AOMedia AV1 Codec
svc_encoder_rtc
1/*
2 * Copyright (c) 2019, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12// This is an example demonstrating how to implement a multi-layer AOM
13// encoding scheme for RTC video applications.
14
15#include <assert.h>
16#include <limits.h>
17#include <math.h>
18#include <stdio.h>
19#include <stdlib.h>
20#include <string.h>
21
22#include <memory>
23
24#include "config/aom_config.h"
25
26#if CONFIG_AV1_DECODER
27#include "aom/aom_decoder.h"
28#endif
29#include "aom/aom_encoder.h"
30#include "aom/aom_image.h"
31#include "aom/aom_integer.h"
32#include "aom/aomcx.h"
33#include "aom_dsp/bitwriter_buffer.h"
34#include "aom_ports/aom_timer.h"
35#include "av1/ratectrl_rtc.h"
36#include "common/args.h"
37#include "common/tools_common.h"
38#include "common/video_writer.h"
39#include "examples/encoder_util.h"
40#include "examples/multilayer_metadata.h"
41
42#define OPTION_BUFFER_SIZE 1024
43#define MAX_NUM_SPATIAL_LAYERS 4
44
45typedef struct {
46 const char *output_filename;
47 char options[OPTION_BUFFER_SIZE];
48 struct AvxInputContext input_ctx[MAX_NUM_SPATIAL_LAYERS];
49 int speed;
50 int aq_mode;
51 int layering_mode;
52 int output_obu;
53 int decode;
54 int tune_content;
55 int show_psnr;
56 bool use_external_rc;
57 bool scale_factors_explicitly_set;
58 const char *multilayer_metadata_file;
59} AppInput;
60
61typedef enum {
62 QUANTIZER = 0,
63 BITRATE,
64 SCALE_FACTOR,
65 AUTO_ALT_REF,
66 ALL_OPTION_TYPES
67} LAYER_OPTION_TYPE;
68
69static const arg_def_t outputfile =
70 ARG_DEF("o", "output", 1, "Output filename");
71static const arg_def_t frames_arg =
72 ARG_DEF("f", "frames", 1, "Number of frames to encode");
73static const arg_def_t threads_arg =
74 ARG_DEF("th", "threads", 1, "Number of threads to use");
75static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "Source width");
76static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "Source height");
77static const arg_def_t timebase_arg =
78 ARG_DEF("t", "timebase", 1, "Timebase (num/den)");
79static const arg_def_t bitrate_arg = ARG_DEF(
80 "b", "target-bitrate", 1, "Encoding bitrate, in kilobits per second");
81static const arg_def_t spatial_layers_arg =
82 ARG_DEF("sl", "spatial-layers", 1, "Number of spatial SVC layers");
83static const arg_def_t temporal_layers_arg =
84 ARG_DEF("tl", "temporal-layers", 1, "Number of temporal SVC layers");
85static const arg_def_t layering_mode_arg =
86 ARG_DEF("lm", "layering-mode", 1, "Temporal layering scheme.");
87static const arg_def_t kf_dist_arg =
88 ARG_DEF("k", "kf-dist", 1, "Number of frames between keyframes");
89static const arg_def_t scale_factors_arg =
90 ARG_DEF("r", "scale-factors", 1, "Scale factors (lowest to highest layer)");
91static const arg_def_t min_q_arg =
92 ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
93static const arg_def_t max_q_arg =
94 ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
95static const arg_def_t speed_arg =
96 ARG_DEF("sp", "speed", 1, "Speed configuration");
97static const arg_def_t aqmode_arg =
98 ARG_DEF("aq", "aqmode", 1, "AQ mode off/on");
99static const arg_def_t bitrates_arg =
100 ARG_DEF("bl", "bitrates", 1,
101 "Bitrates[spatial_layer * num_temporal_layer + temporal_layer]");
102static const arg_def_t dropframe_thresh_arg =
103 ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)");
104static const arg_def_t error_resilient_arg =
105 ARG_DEF(NULL, "error-resilient", 1, "Error resilient flag");
106static const arg_def_t output_obu_arg =
107 ARG_DEF(NULL, "output-obu", 1,
108 "Write OBUs when set to 1. Otherwise write IVF files.");
109static const arg_def_t test_decode_arg =
110 ARG_DEF(NULL, "test-decode", 1,
111 "Attempt to test decoding the output when set to 1. Default is 1.");
112static const arg_def_t psnr_arg =
113 ARG_DEF(NULL, "psnr", -1, "Show PSNR in status line.");
114static const arg_def_t ext_rc_arg =
115 ARG_DEF(NULL, "use-ext-rc", 0, "Use external rate control.");
116static const struct arg_enum_list tune_content_enum[] = {
117 { "default", AOM_CONTENT_DEFAULT },
118 { "screen", AOM_CONTENT_SCREEN },
119 { "film", AOM_CONTENT_FILM },
120 { NULL, 0 }
121};
122static const arg_def_t tune_content_arg = ARG_DEF_ENUM(
123 NULL, "tune-content", 1, "Tune content type", tune_content_enum);
124static const arg_def_t multilayer_metadata_file_arg =
125 ARG_DEF("ml", "multilayer_metadata_file", 1,
126 "Experimental: path to multilayer metadata file");
127
128#if CONFIG_AV1_HIGHBITDEPTH
129static const struct arg_enum_list bitdepth_enum[] = { { "8", AOM_BITS_8 },
130 { "10", AOM_BITS_10 },
131 { NULL, 0 } };
132
133static const arg_def_t bitdepth_arg = ARG_DEF_ENUM(
134 "d", "bit-depth", 1, "Bit depth for codec 8 or 10. ", bitdepth_enum);
135#endif // CONFIG_AV1_HIGHBITDEPTH
136
137static const arg_def_t *svc_args[] = {
138 &frames_arg, &outputfile, &width_arg,
139 &height_arg, &timebase_arg, &bitrate_arg,
140 &spatial_layers_arg, &kf_dist_arg, &scale_factors_arg,
141 &min_q_arg, &max_q_arg, &temporal_layers_arg,
142 &layering_mode_arg, &threads_arg, &aqmode_arg,
143#if CONFIG_AV1_HIGHBITDEPTH
144 &bitdepth_arg,
145#endif
146 &speed_arg, &bitrates_arg, &dropframe_thresh_arg,
147 &error_resilient_arg, &output_obu_arg, &test_decode_arg,
148 &tune_content_arg, &psnr_arg, NULL,
149};
150
151#define zero(Dest) memset(&(Dest), 0, sizeof(Dest))
152
153static const char *exec_name;
154
155void usage_exit(void) {
156 fprintf(stderr,
157 "Usage: %s <options> input_filename [input_filename ...] -o "
158 "output_filename\n",
159 exec_name);
160 fprintf(stderr, "Options:\n");
161 arg_show_usage(stderr, svc_args);
162 fprintf(
163 stderr,
164 "Input files must be y4m or yuv.\n"
165 "If multiple input files are specified, they correspond to spatial "
166 "layers, and there should be as many as there are spatial layers.\n"
167 "All input files must have the same width, height, frame rate and number "
168 "of frames.\n"
169 "If only one file is specified, it is used for all spatial layers.\n");
170 exit(EXIT_FAILURE);
171}
172
173static int file_is_y4m(const char detect[4]) {
174 return memcmp(detect, "YUV4", 4) == 0;
175}
176
177static int fourcc_is_ivf(const char detect[4]) {
178 if (memcmp(detect, "DKIF", 4) == 0) {
179 return 1;
180 }
181 return 0;
182}
183
184static const int option_max_values[ALL_OPTION_TYPES] = { 63, INT_MAX, INT_MAX,
185 1 };
186
187static const int option_min_values[ALL_OPTION_TYPES] = { 0, 0, 1, 0 };
188
189static void open_input_file(struct AvxInputContext *input,
191 /* Parse certain options from the input file, if possible */
192 input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb")
193 : set_binary_mode(stdin);
194
195 if (!input->file) fatal("Failed to open input file");
196
197 if (!fseeko(input->file, 0, SEEK_END)) {
198 /* Input file is seekable. Figure out how long it is, so we can get
199 * progress info.
200 */
201 input->length = ftello(input->file);
202 rewind(input->file);
203 }
204
205 /* Default to 1:1 pixel aspect ratio. */
206 input->pixel_aspect_ratio.numerator = 1;
207 input->pixel_aspect_ratio.denominator = 1;
208
209 /* For RAW input sources, these bytes will applied on the first frame
210 * in read_frame().
211 */
212 input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
213 input->detect.position = 0;
214
215 if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) {
216 if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, csp,
217 input->only_i420) >= 0) {
218 input->file_type = FILE_TYPE_Y4M;
219 input->width = input->y4m.pic_w;
220 input->height = input->y4m.pic_h;
221 input->pixel_aspect_ratio.numerator = input->y4m.par_n;
222 input->pixel_aspect_ratio.denominator = input->y4m.par_d;
223 input->framerate.numerator = input->y4m.fps_n;
224 input->framerate.denominator = input->y4m.fps_d;
225 input->fmt = input->y4m.aom_fmt;
226 input->bit_depth = static_cast<aom_bit_depth_t>(input->y4m.bit_depth);
227 } else {
228 fatal("Unsupported Y4M stream.");
229 }
230 } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
231 fatal("IVF is not supported as input.");
232 } else {
233 input->file_type = FILE_TYPE_RAW;
234 }
235}
236
237static aom_codec_err_t extract_option(LAYER_OPTION_TYPE type, char *input,
238 int *value0, int *value1) {
239 if (type == SCALE_FACTOR) {
240 *value0 = (int)strtol(input, &input, 10);
241 if (*input++ != '/') return AOM_CODEC_INVALID_PARAM;
242 *value1 = (int)strtol(input, &input, 10);
243
244 if (*value0 < option_min_values[SCALE_FACTOR] ||
245 *value1 < option_min_values[SCALE_FACTOR] ||
246 *value0 > option_max_values[SCALE_FACTOR] ||
247 *value1 > option_max_values[SCALE_FACTOR] ||
248 *value0 > *value1) // num shouldn't be greater than den
250 } else {
251 *value0 = atoi(input);
252 if (*value0 < option_min_values[type] || *value0 > option_max_values[type])
254 }
255 return AOM_CODEC_OK;
256}
257
258static aom_codec_err_t parse_layer_options_from_string(
259 aom_svc_params_t *svc_params, LAYER_OPTION_TYPE type, const char *input,
260 int *option0, int *option1) {
262 char *input_string;
263 char *token;
264 const char *delim = ",";
265 int num_layers = svc_params->number_spatial_layers;
266 int i = 0;
267
268 if (type == BITRATE)
269 num_layers =
270 svc_params->number_spatial_layers * svc_params->number_temporal_layers;
271
272 if (input == NULL || option0 == NULL ||
273 (option1 == NULL && type == SCALE_FACTOR))
275
276 const size_t input_length = strlen(input);
277 input_string = reinterpret_cast<char *>(malloc(input_length + 1));
278 if (input_string == NULL) return AOM_CODEC_MEM_ERROR;
279 memcpy(input_string, input, input_length + 1);
280 token = strtok(input_string, delim); // NOLINT
281 for (i = 0; i < num_layers; ++i) {
282 if (token != NULL) {
283 res = extract_option(type, token, option0 + i, option1 + i);
284 if (res != AOM_CODEC_OK) break;
285 token = strtok(NULL, delim); // NOLINT
286 } else {
288 break;
289 }
290 }
291 free(input_string);
292 return res;
293}
294
295static void parse_command_line(int argc, const char **argv_,
296 AppInput *app_input,
297 aom_svc_params_t *svc_params,
298 aom_codec_enc_cfg_t *enc_cfg) {
299 struct arg arg;
300 char **argv = NULL;
301 char **argi = NULL;
302 char **argj = NULL;
303 char string_options[1024] = { 0 };
304
305 // Default settings
306 svc_params->number_spatial_layers = 1;
307 svc_params->number_temporal_layers = 1;
308 app_input->layering_mode = 0;
309 app_input->output_obu = 0;
310 app_input->decode = 1;
311 enc_cfg->g_threads = 1;
312 enc_cfg->rc_end_usage = AOM_CBR;
313
314 // process command line options
315 argv = argv_dup(argc - 1, argv_ + 1);
316 if (!argv) {
317 fprintf(stderr, "Error allocating argument list\n");
318 exit(EXIT_FAILURE);
319 }
320 for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
321 arg.argv_step = 1;
322
323 if (arg_match(&arg, &outputfile, argi)) {
324 app_input->output_filename = arg.val;
325 } else if (arg_match(&arg, &width_arg, argi)) {
326 enc_cfg->g_w = arg_parse_uint(&arg);
327 } else if (arg_match(&arg, &height_arg, argi)) {
328 enc_cfg->g_h = arg_parse_uint(&arg);
329 } else if (arg_match(&arg, &timebase_arg, argi)) {
330 enc_cfg->g_timebase = arg_parse_rational(&arg);
331 } else if (arg_match(&arg, &bitrate_arg, argi)) {
332 enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
333 } else if (arg_match(&arg, &spatial_layers_arg, argi)) {
334 svc_params->number_spatial_layers = arg_parse_uint(&arg);
335 } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
336 svc_params->number_temporal_layers = arg_parse_uint(&arg);
337 } else if (arg_match(&arg, &speed_arg, argi)) {
338 app_input->speed = arg_parse_uint(&arg);
339 if (app_input->speed > 11) {
340 aom_tools_warn("Mapping speed %d to speed 11.\n", app_input->speed);
341 }
342 } else if (arg_match(&arg, &aqmode_arg, argi)) {
343 app_input->aq_mode = arg_parse_uint(&arg);
344 } else if (arg_match(&arg, &threads_arg, argi)) {
345 enc_cfg->g_threads = arg_parse_uint(&arg);
346 } else if (arg_match(&arg, &layering_mode_arg, argi)) {
347 app_input->layering_mode = arg_parse_int(&arg);
348 } else if (arg_match(&arg, &kf_dist_arg, argi)) {
349 enc_cfg->kf_min_dist = arg_parse_uint(&arg);
350 enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
351 } else if (arg_match(&arg, &scale_factors_arg, argi)) {
352 aom_codec_err_t res = parse_layer_options_from_string(
353 svc_params, SCALE_FACTOR, arg.val, svc_params->scaling_factor_num,
354 svc_params->scaling_factor_den);
355 app_input->scale_factors_explicitly_set = true;
356 if (res != AOM_CODEC_OK) {
357 die("Failed to parse scale factors: %s\n",
359 }
360 } else if (arg_match(&arg, &min_q_arg, argi)) {
361 enc_cfg->rc_min_quantizer = arg_parse_uint(&arg);
362 } else if (arg_match(&arg, &max_q_arg, argi)) {
363 enc_cfg->rc_max_quantizer = arg_parse_uint(&arg);
364#if CONFIG_AV1_HIGHBITDEPTH
365 } else if (arg_match(&arg, &bitdepth_arg, argi)) {
366 enc_cfg->g_bit_depth =
367 static_cast<aom_bit_depth_t>(arg_parse_enum_or_int(&arg));
368 switch (enc_cfg->g_bit_depth) {
369 case AOM_BITS_8:
370 enc_cfg->g_input_bit_depth = 8;
371 enc_cfg->g_profile = 0;
372 break;
373 case AOM_BITS_10:
374 enc_cfg->g_input_bit_depth = 10;
375 enc_cfg->g_profile = 0;
376 break;
377 default:
378 die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth);
379 }
380#endif // CONFIG_VP9_HIGHBITDEPTH
381 } else if (arg_match(&arg, &dropframe_thresh_arg, argi)) {
382 enc_cfg->rc_dropframe_thresh = arg_parse_uint(&arg);
383 } else if (arg_match(&arg, &error_resilient_arg, argi)) {
384 enc_cfg->g_error_resilient = arg_parse_uint(&arg);
385 if (enc_cfg->g_error_resilient != 0 && enc_cfg->g_error_resilient != 1)
386 die("Invalid value for error resilient (0, 1): %d.",
387 enc_cfg->g_error_resilient);
388 } else if (arg_match(&arg, &output_obu_arg, argi)) {
389 app_input->output_obu = arg_parse_uint(&arg);
390 if (app_input->output_obu != 0 && app_input->output_obu != 1)
391 die("Invalid value for obu output flag (0, 1): %d.",
392 app_input->output_obu);
393 } else if (arg_match(&arg, &test_decode_arg, argi)) {
394 app_input->decode = arg_parse_uint(&arg);
395 if (app_input->decode != 0 && app_input->decode != 1)
396 die("Invalid value for test decode flag (0, 1): %d.",
397 app_input->decode);
398 } else if (arg_match(&arg, &tune_content_arg, argi)) {
399 app_input->tune_content = arg_parse_enum_or_int(&arg);
400 printf("tune content %d\n", app_input->tune_content);
401 } else if (arg_match(&arg, &psnr_arg, argi)) {
402 app_input->show_psnr = 1;
403 } else if (arg_match(&arg, &ext_rc_arg, argi)) {
404 app_input->use_external_rc = true;
405 } else if (arg_match(&arg, &multilayer_metadata_file_arg, argi)) {
406 app_input->multilayer_metadata_file = arg.val;
407 } else {
408 ++argj;
409 }
410 }
411
412 // Total bitrate needs to be parsed after the number of layers.
413 for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
414 arg.argv_step = 1;
415 if (arg_match(&arg, &bitrates_arg, argi)) {
416 aom_codec_err_t res = parse_layer_options_from_string(
417 svc_params, BITRATE, arg.val, svc_params->layer_target_bitrate, NULL);
418 if (res != AOM_CODEC_OK) {
419 die("Failed to parse bitrates: %s\n", aom_codec_err_to_string(res));
420 }
421 } else {
422 ++argj;
423 }
424 }
425
426 // There will be a space in front of the string options
427 if (strlen(string_options) > 0)
428 strncpy(app_input->options, string_options, OPTION_BUFFER_SIZE);
429
430 // Check for unrecognized options
431 for (argi = argv; *argi; ++argi)
432 if (argi[0][0] == '-' && strlen(argi[0]) > 1)
433 die("Error: Unrecognized option %s\n", *argi);
434
435 if (argv[0] == NULL) {
436 usage_exit();
437 }
438
439 int input_count = 0;
440 while (argv[input_count] != NULL && input_count < MAX_NUM_SPATIAL_LAYERS) {
441 app_input->input_ctx[input_count].filename = argv[input_count];
442 ++input_count;
443 }
444 if (input_count > 1 && input_count != svc_params->number_spatial_layers) {
445 die("Error: Number of input files does not match number of spatial layers");
446 }
447 if (argv[input_count] != NULL) {
448 die("Error: Too many input files specified, there should be at most %d",
449 MAX_NUM_SPATIAL_LAYERS);
450 }
451
452 free(argv);
453
454 for (int i = 0; i < input_count; ++i) {
455 open_input_file(&app_input->input_ctx[i], AOM_CSP_UNKNOWN);
456 if (app_input->input_ctx[i].file_type == FILE_TYPE_Y4M) {
457 if (enc_cfg->g_w == 0 || enc_cfg->g_h == 0) {
458 // Override these settings with the info from Y4M file.
459 enc_cfg->g_w = app_input->input_ctx[i].width;
460 enc_cfg->g_h = app_input->input_ctx[i].height;
461 // g_timebase is the reciprocal of frame rate.
462 enc_cfg->g_timebase.num = app_input->input_ctx[i].framerate.denominator;
463 enc_cfg->g_timebase.den = app_input->input_ctx[i].framerate.numerator;
464 } else if (enc_cfg->g_w != app_input->input_ctx[i].width ||
465 enc_cfg->g_h != app_input->input_ctx[i].height ||
466 enc_cfg->g_timebase.num !=
467 app_input->input_ctx[i].framerate.denominator ||
468 enc_cfg->g_timebase.den !=
469 app_input->input_ctx[i].framerate.numerator) {
470 die("Error: Input file dimensions and/or frame rate mismatch");
471 }
472 }
473 }
474 if (enc_cfg->g_w == 0 || enc_cfg->g_h == 0) {
475 die("Error: Input file dimensions not set, use -w and -h");
476 }
477
478 if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
479 enc_cfg->g_h % 2)
480 die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
481
482 printf(
483 "Codec %s\n"
484 "layers: %d\n"
485 "width %u, height: %u\n"
486 "num: %d, den: %d, bitrate: %u\n"
487 "gop size: %u\n",
489 svc_params->number_spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
490 enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
491 enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
492}
493
494static int mode_to_num_temporal_layers[12] = {
495 1, 2, 3, 3, 2, 1, 1, 3, 3, 3, 3, 3,
496};
497static int mode_to_num_spatial_layers[12] = {
498 1, 1, 1, 1, 1, 2, 3, 2, 3, 3, 3, 3,
499};
500
501// For rate control encoding stats.
502struct RateControlMetrics {
503 // Number of input frames per layer.
504 int layer_input_frames[AOM_MAX_TS_LAYERS];
505 // Number of encoded non-key frames per layer.
506 int layer_enc_frames[AOM_MAX_TS_LAYERS];
507 // Framerate per layer layer (cumulative).
508 double layer_framerate[AOM_MAX_TS_LAYERS];
509 // Target average frame size per layer (per-frame-bandwidth per layer).
510 double layer_pfb[AOM_MAX_LAYERS];
511 // Actual average frame size per layer.
512 double layer_avg_frame_size[AOM_MAX_LAYERS];
513 // Average rate mismatch per layer (|target - actual| / target).
514 double layer_avg_rate_mismatch[AOM_MAX_LAYERS];
515 // Actual encoding bitrate per layer (cumulative across temporal layers).
516 double layer_encoding_bitrate[AOM_MAX_LAYERS];
517 // Average of the short-time encoder actual bitrate.
518 // TODO(marpan): Should we add these short-time stats for each layer?
519 double avg_st_encoding_bitrate;
520 // Variance of the short-time encoder actual bitrate.
521 double variance_st_encoding_bitrate;
522 // Window (number of frames) for computing short-timee encoding bitrate.
523 int window_size;
524 // Number of window measurements.
525 int window_count;
526 int layer_target_bitrate[AOM_MAX_LAYERS];
527};
528
529static const int REF_FRAMES = 8;
530
531static const int INTER_REFS_PER_FRAME = 7;
532
533// Reference frames used in this example encoder.
534enum {
535 SVC_LAST_FRAME = 0,
536 SVC_LAST2_FRAME,
537 SVC_LAST3_FRAME,
538 SVC_GOLDEN_FRAME,
539 SVC_BWDREF_FRAME,
540 SVC_ALTREF2_FRAME,
541 SVC_ALTREF_FRAME
542};
543
544static int read_frame(struct AvxInputContext *input_ctx, aom_image_t *img) {
545 FILE *f = input_ctx->file;
546 y4m_input *y4m = &input_ctx->y4m;
547 int shortread = 0;
548
549 if (input_ctx->file_type == FILE_TYPE_Y4M) {
550 if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0;
551 } else {
552 shortread = read_yuv_frame(input_ctx, img);
553 }
554
555 return !shortread;
556}
557
558static void close_input_file(struct AvxInputContext *input) {
559 fclose(input->file);
560 if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m);
561}
562
563// Note: these rate control metrics assume only 1 key frame in the
564// sequence (i.e., first frame only). So for temporal pattern# 7
565// (which has key frame for every frame on base layer), the metrics
566// computation will be off/wrong.
567// TODO(marpan): Update these metrics to account for multiple key frames
568// in the stream.
569static void set_rate_control_metrics(struct RateControlMetrics *rc,
570 double framerate, int ss_number_layers,
571 int ts_number_layers) {
572 int ts_rate_decimator[AOM_MAX_TS_LAYERS] = { 1 };
573 ts_rate_decimator[0] = 1;
574 if (ts_number_layers == 2) {
575 ts_rate_decimator[0] = 2;
576 ts_rate_decimator[1] = 1;
577 }
578 if (ts_number_layers == 3) {
579 ts_rate_decimator[0] = 4;
580 ts_rate_decimator[1] = 2;
581 ts_rate_decimator[2] = 1;
582 }
583 // Set the layer (cumulative) framerate and the target layer (non-cumulative)
584 // per-frame-bandwidth, for the rate control encoding stats below.
585 for (int sl = 0; sl < ss_number_layers; ++sl) {
586 int i = sl * ts_number_layers;
587 rc->layer_framerate[0] = framerate / ts_rate_decimator[0];
588 rc->layer_pfb[i] =
589 1000.0 * rc->layer_target_bitrate[i] / rc->layer_framerate[0];
590 for (int tl = 0; tl < ts_number_layers; ++tl) {
591 i = sl * ts_number_layers + tl;
592 if (tl > 0) {
593 rc->layer_framerate[tl] = framerate / ts_rate_decimator[tl];
594 rc->layer_pfb[i] =
595 1000.0 *
596 (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
597 (rc->layer_framerate[tl] - rc->layer_framerate[tl - 1]);
598 }
599 rc->layer_input_frames[tl] = 0;
600 rc->layer_enc_frames[tl] = 0;
601 rc->layer_encoding_bitrate[i] = 0.0;
602 rc->layer_avg_frame_size[i] = 0.0;
603 rc->layer_avg_rate_mismatch[i] = 0.0;
604 }
605 }
606 rc->window_count = 0;
607 rc->window_size = 15;
608 rc->avg_st_encoding_bitrate = 0.0;
609 rc->variance_st_encoding_bitrate = 0.0;
610}
611
612static void printout_rate_control_summary(struct RateControlMetrics *rc,
613 int frame_cnt, int ss_number_layers,
614 int ts_number_layers) {
615 int tot_num_frames = 0;
616 double perc_fluctuation = 0.0;
617 printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
618 printf("Rate control layer stats for %d layer(s):\n\n", ts_number_layers);
619 for (int sl = 0; sl < ss_number_layers; ++sl) {
620 tot_num_frames = 0;
621 for (int tl = 0; tl < ts_number_layers; ++tl) {
622 int i = sl * ts_number_layers + tl;
623 const int num_dropped =
624 tl > 0 ? rc->layer_input_frames[tl] - rc->layer_enc_frames[tl]
625 : rc->layer_input_frames[tl] - rc->layer_enc_frames[tl] - 1;
626 tot_num_frames += rc->layer_input_frames[tl];
627 rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[tl] *
628 rc->layer_encoding_bitrate[i] /
629 tot_num_frames;
630 rc->layer_avg_frame_size[i] =
631 rc->layer_avg_frame_size[i] / rc->layer_enc_frames[tl];
632 rc->layer_avg_rate_mismatch[i] =
633 100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[tl];
634 printf("For layer#: %d %d \n", sl, tl);
635 printf("Bitrate (target vs actual): %d %f\n", rc->layer_target_bitrate[i],
636 rc->layer_encoding_bitrate[i]);
637 printf("Average frame size (target vs actual): %f %f\n", rc->layer_pfb[i],
638 rc->layer_avg_frame_size[i]);
639 printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[i]);
640 printf(
641 "Number of input frames, encoded (non-key) frames, "
642 "and perc dropped frames: %d %d %f\n",
643 rc->layer_input_frames[tl], rc->layer_enc_frames[tl],
644 100.0 * num_dropped / rc->layer_input_frames[tl]);
645 printf("\n");
646 }
647 }
648 rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
649 rc->variance_st_encoding_bitrate =
650 rc->variance_st_encoding_bitrate / rc->window_count -
651 (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
652 perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
653 rc->avg_st_encoding_bitrate;
654 printf("Short-time stats, for window of %d frames:\n", rc->window_size);
655 printf("Average, rms-variance, and percent-fluct: %f %f %f\n",
656 rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
657 perc_fluctuation);
658 if (frame_cnt - 1 != tot_num_frames)
659 die("Error: Number of input frames not equal to output!\n");
660}
661
662// Layer pattern configuration.
663static void set_layer_pattern(
664 int layering_mode, int superframe_cnt, aom_svc_layer_id_t *layer_id,
665 aom_svc_ref_frame_config_t *ref_frame_config,
666 aom_svc_ref_frame_comp_pred_t *ref_frame_comp_pred, int *use_svc_control,
667 int spatial_layer_id, int is_key_frame, int ksvc_mode, int speed) {
668 // Setting this flag to 1 enables simplex example of
669 // RPS (Reference Picture Selection) for 1 layer.
670 int use_rps_example = 0;
671 int i;
672 int enable_longterm_temporal_ref = 1;
673 int shift = (layering_mode == 8) ? 2 : 0;
674 int simulcast_mode = (layering_mode == 11);
675 *use_svc_control = 1;
676 layer_id->spatial_layer_id = spatial_layer_id;
677 int lag_index = 0;
678 int base_count = superframe_cnt >> 2;
679 ref_frame_comp_pred->use_comp_pred[0] = 0; // GOLDEN_LAST
680 ref_frame_comp_pred->use_comp_pred[1] = 0; // LAST2_LAST
681 ref_frame_comp_pred->use_comp_pred[2] = 0; // ALTREF_LAST
682 // Set the reference map buffer idx for the 7 references:
683 // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3),
684 // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6).
685 for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = i;
686 for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->reference[i] = 0;
687 for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
688
689 if (ksvc_mode) {
690 // Same pattern as case 9, but the reference strucutre will be constrained
691 // below.
692 layering_mode = 9;
693 }
694 switch (layering_mode) {
695 case 0:
696 if (use_rps_example == 0) {
697 // 1-layer: update LAST on every frame, reference LAST.
698 layer_id->temporal_layer_id = 0;
699 layer_id->spatial_layer_id = 0;
700 ref_frame_config->refresh[0] = 1;
701 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
702 } else {
703 // Pattern of 2 references (ALTREF and GOLDEN) trailing
704 // LAST by 4 and 8 frames, with some switching logic to
705 // sometimes only predict from the longer-term reference
706 //(golden here). This is simple example to test RPS
707 // (reference picture selection).
708 int last_idx = 0;
709 int last_idx_refresh = 0;
710 int gld_idx = 0;
711 int alt_ref_idx = 0;
712 int lag_alt = 4;
713 int lag_gld = 8;
714 layer_id->temporal_layer_id = 0;
715 layer_id->spatial_layer_id = 0;
716 int sh = 8; // slots 0 - 7.
717 // Moving index slot for last: 0 - (sh - 1)
718 if (superframe_cnt > 1) last_idx = (superframe_cnt - 1) % sh;
719 // Moving index for refresh of last: one ahead for next frame.
720 last_idx_refresh = superframe_cnt % sh;
721 // Moving index for gld_ref, lag behind current by lag_gld
722 if (superframe_cnt > lag_gld) gld_idx = (superframe_cnt - lag_gld) % sh;
723 // Moving index for alt_ref, lag behind LAST by lag_alt frames.
724 if (superframe_cnt > lag_alt)
725 alt_ref_idx = (superframe_cnt - lag_alt) % sh;
726 // Set the ref_idx.
727 // Default all references to slot for last.
728 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
729 ref_frame_config->ref_idx[i] = last_idx;
730 // Set the ref_idx for the relevant references.
731 ref_frame_config->ref_idx[SVC_LAST_FRAME] = last_idx;
732 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = last_idx_refresh;
733 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = gld_idx;
734 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = alt_ref_idx;
735 // Refresh this slot, which will become LAST on next frame.
736 ref_frame_config->refresh[last_idx_refresh] = 1;
737 // Reference LAST, ALTREF, and GOLDEN
738 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
739 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
740 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
741 // Switch to only GOLDEN every 300 frames.
742 if (superframe_cnt % 200 == 0 && superframe_cnt > 0) {
743 ref_frame_config->reference[SVC_LAST_FRAME] = 0;
744 ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
745 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
746 // Test if the long-term is LAST instead, this is just a renaming
747 // but its tests if encoder behaves the same, whether its
748 // LAST or GOLDEN.
749 if (superframe_cnt % 400 == 0 && superframe_cnt > 0) {
750 ref_frame_config->ref_idx[SVC_LAST_FRAME] = gld_idx;
751 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
752 ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
753 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
754 }
755 }
756 }
757 break;
758 case 1:
759 // 2-temporal layer.
760 // 1 3 5
761 // 0 2 4
762 // Keep golden fixed at slot 3.
763 base_count = superframe_cnt >> 1;
764 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
765 // Cyclically refresh slots 5, 6, 7, for lag alt ref.
766 lag_index = 5;
767 if (base_count > 0) {
768 lag_index = 5 + (base_count % 3);
769 if (superframe_cnt % 2 != 0) lag_index = 5 + ((base_count + 1) % 3);
770 }
771 // Set the altref slot to lag_index.
772 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
773 if (superframe_cnt % 2 == 0) {
774 layer_id->temporal_layer_id = 0;
775 // Update LAST on layer 0, reference LAST.
776 ref_frame_config->refresh[0] = 1;
777 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
778 // Refresh lag_index slot, needed for lagging golen.
779 ref_frame_config->refresh[lag_index] = 1;
780 // Refresh GOLDEN every x base layer frames.
781 if (base_count % 32 == 0) ref_frame_config->refresh[3] = 1;
782 } else {
783 layer_id->temporal_layer_id = 1;
784 // No updates on layer 1, reference LAST (TL0).
785 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
786 }
787 // Always reference golden and altref on TL0.
788 if (layer_id->temporal_layer_id == 0) {
789 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
790 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
791 }
792 break;
793 case 2:
794 // 3-temporal layer:
795 // 1 3 5 7
796 // 2 6
797 // 0 4 8
798 if (superframe_cnt % 4 == 0) {
799 // Base layer.
800 layer_id->temporal_layer_id = 0;
801 // Update LAST on layer 0, reference LAST.
802 ref_frame_config->refresh[0] = 1;
803 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
804 } else if ((superframe_cnt - 1) % 4 == 0) {
805 layer_id->temporal_layer_id = 2;
806 // First top layer: no updates, only reference LAST (TL0).
807 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
808 } else if ((superframe_cnt - 2) % 4 == 0) {
809 layer_id->temporal_layer_id = 1;
810 // Middle layer (TL1): update LAST2, only reference LAST (TL0).
811 ref_frame_config->refresh[1] = 1;
812 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
813 } else if ((superframe_cnt - 3) % 4 == 0) {
814 layer_id->temporal_layer_id = 2;
815 // Second top layer: no updates, only reference LAST.
816 // Set buffer idx for LAST to slot 1, since that was the slot
817 // updated in previous frame. So LAST is TL1 frame.
818 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
819 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
820 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
821 }
822 break;
823 case 3:
824 // 3 TL, same as above, except allow for predicting
825 // off 2 more references (GOLDEN and ALTREF), with
826 // GOLDEN updated periodically, and ALTREF lagging from
827 // LAST from ~4 frames. Both GOLDEN and ALTREF
828 // can only be updated on base temporal layer.
829
830 // Keep golden fixed at slot 3.
831 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
832 // Cyclically refresh slots 5, 6, 7, for lag altref.
833 lag_index = 5;
834 if (base_count > 0) {
835 lag_index = 5 + (base_count % 3);
836 if (superframe_cnt % 4 != 0) lag_index = 5 + ((base_count + 1) % 3);
837 }
838 // Set the altref slot to lag_index.
839 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
840 if (superframe_cnt % 4 == 0) {
841 // Base layer.
842 layer_id->temporal_layer_id = 0;
843 // Update LAST on layer 0, reference LAST.
844 ref_frame_config->refresh[0] = 1;
845 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
846 // Refresh GOLDEN every x ~10 base layer frames.
847 if (base_count % 10 == 0) ref_frame_config->refresh[3] = 1;
848 // Refresh lag_index slot, needed for lagging altref.
849 ref_frame_config->refresh[lag_index] = 1;
850 } else if ((superframe_cnt - 1) % 4 == 0) {
851 layer_id->temporal_layer_id = 2;
852 // First top layer: no updates, only reference LAST (TL0).
853 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
854 } else if ((superframe_cnt - 2) % 4 == 0) {
855 layer_id->temporal_layer_id = 1;
856 // Middle layer (TL1): update LAST2, only reference LAST (TL0).
857 ref_frame_config->refresh[1] = 1;
858 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
859 } else if ((superframe_cnt - 3) % 4 == 0) {
860 layer_id->temporal_layer_id = 2;
861 // Second top layer: no updates, only reference LAST.
862 // Set buffer idx for LAST to slot 1, since that was the slot
863 // updated in previous frame. So LAST is TL1 frame.
864 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
865 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
866 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
867 }
868 // Every frame can reference GOLDEN AND ALTREF.
869 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
870 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
871 // Allow for compound prediction for LAST-ALTREF and LAST-GOLDEN.
872 if (speed >= 7) {
873 ref_frame_comp_pred->use_comp_pred[2] = 1;
874 ref_frame_comp_pred->use_comp_pred[0] = 1;
875 }
876 break;
877 case 4:
878 // 3-temporal layer: but middle layer updates GF, so 2nd TL2 will
879 // only reference GF (not LAST). Other frames only reference LAST.
880 // 1 3 5 7
881 // 2 6
882 // 0 4 8
883 if (superframe_cnt % 4 == 0) {
884 // Base layer.
885 layer_id->temporal_layer_id = 0;
886 // Update LAST on layer 0, only reference LAST.
887 ref_frame_config->refresh[0] = 1;
888 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
889 } else if ((superframe_cnt - 1) % 4 == 0) {
890 layer_id->temporal_layer_id = 2;
891 // First top layer: no updates, only reference LAST (TL0).
892 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
893 } else if ((superframe_cnt - 2) % 4 == 0) {
894 layer_id->temporal_layer_id = 1;
895 // Middle layer (TL1): update GF, only reference LAST (TL0).
896 ref_frame_config->refresh[3] = 1;
897 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
898 } else if ((superframe_cnt - 3) % 4 == 0) {
899 layer_id->temporal_layer_id = 2;
900 // Second top layer: no updates, only reference GF.
901 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
902 }
903 break;
904 case 5:
905 // 2 spatial layers, 1 temporal.
906 layer_id->temporal_layer_id = 0;
907 if (layer_id->spatial_layer_id == 0) {
908 // Reference LAST, update LAST.
909 ref_frame_config->refresh[0] = 1;
910 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
911 } else if (layer_id->spatial_layer_id == 1) {
912 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
913 // and GOLDEN to slot 0. Update slot 1 (LAST).
914 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
915 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 0;
916 ref_frame_config->refresh[1] = 1;
917 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
918 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
919 }
920 break;
921 case 6:
922 // 3 spatial layers, 1 temporal.
923 // Note for this case, we set the buffer idx for all references to be
924 // either LAST or GOLDEN, which are always valid references, since decoder
925 // will check if any of the 7 references is valid scale in
926 // valid_ref_frame_size().
927 layer_id->temporal_layer_id = 0;
928 if (layer_id->spatial_layer_id == 0) {
929 // Reference LAST, update LAST. Set all buffer_idx to 0.
930 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
931 ref_frame_config->ref_idx[i] = 0;
932 ref_frame_config->refresh[0] = 1;
933 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
934 } else if (layer_id->spatial_layer_id == 1) {
935 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
936 // and GOLDEN (and all other refs) to slot 0.
937 // Update slot 1 (LAST).
938 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
939 ref_frame_config->ref_idx[i] = 0;
940 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
941 ref_frame_config->refresh[1] = 1;
942 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
943 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
944 } else if (layer_id->spatial_layer_id == 2) {
945 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2
946 // and GOLDEN (and all other refs) to slot 1.
947 // Update slot 2 (LAST).
948 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
949 ref_frame_config->ref_idx[i] = 1;
950 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
951 ref_frame_config->refresh[2] = 1;
952 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
953 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
954 // For 3 spatial layer case: allow for top spatial layer to use
955 // additional temporal reference. Update every 10 frames.
956 if (enable_longterm_temporal_ref) {
957 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
958 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
959 if (base_count % 10 == 0)
960 ref_frame_config->refresh[REF_FRAMES - 1] = 1;
961 }
962 }
963 break;
964 case 7:
965 // 2 spatial and 3 temporal layer.
966 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
967 if (superframe_cnt % 4 == 0) {
968 // Base temporal layer
969 layer_id->temporal_layer_id = 0;
970 if (layer_id->spatial_layer_id == 0) {
971 // Reference LAST, update LAST
972 // Set all buffer_idx to 0
973 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
974 ref_frame_config->ref_idx[i] = 0;
975 ref_frame_config->refresh[0] = 1;
976 } else if (layer_id->spatial_layer_id == 1) {
977 // Reference LAST and GOLDEN.
978 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
979 ref_frame_config->ref_idx[i] = 0;
980 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
981 ref_frame_config->refresh[1] = 1;
982 }
983 } else if ((superframe_cnt - 1) % 4 == 0) {
984 // First top temporal enhancement layer.
985 layer_id->temporal_layer_id = 2;
986 if (layer_id->spatial_layer_id == 0) {
987 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
988 ref_frame_config->ref_idx[i] = 0;
989 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
990 ref_frame_config->refresh[3] = 1;
991 } else if (layer_id->spatial_layer_id == 1) {
992 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
993 // GOLDEN (and all other refs) to slot 3.
994 // No update.
995 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
996 ref_frame_config->ref_idx[i] = 3;
997 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
998 }
999 } else if ((superframe_cnt - 2) % 4 == 0) {
1000 // Middle temporal enhancement layer.
1001 layer_id->temporal_layer_id = 1;
1002 if (layer_id->spatial_layer_id == 0) {
1003 // Reference LAST.
1004 // Set all buffer_idx to 0.
1005 // Set GOLDEN to slot 5 and update slot 5.
1006 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1007 ref_frame_config->ref_idx[i] = 0;
1008 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
1009 ref_frame_config->refresh[5 - shift] = 1;
1010 } else if (layer_id->spatial_layer_id == 1) {
1011 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1012 // GOLDEN (and all other refs) to slot 5.
1013 // Set LAST3 to slot 6 and update slot 6.
1014 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1015 ref_frame_config->ref_idx[i] = 5 - shift;
1016 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1017 ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
1018 ref_frame_config->refresh[6 - shift] = 1;
1019 }
1020 } else if ((superframe_cnt - 3) % 4 == 0) {
1021 // Second top temporal enhancement layer.
1022 layer_id->temporal_layer_id = 2;
1023 if (layer_id->spatial_layer_id == 0) {
1024 // Set LAST to slot 5 and reference LAST.
1025 // Set GOLDEN to slot 3 and update slot 3.
1026 // Set all other buffer_idx to 0.
1027 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1028 ref_frame_config->ref_idx[i] = 0;
1029 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
1030 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1031 ref_frame_config->refresh[3] = 1;
1032 } else if (layer_id->spatial_layer_id == 1) {
1033 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
1034 // GOLDEN to slot 3. No update.
1035 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1036 ref_frame_config->ref_idx[i] = 0;
1037 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
1038 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1039 }
1040 }
1041 break;
1042 case 8:
1043 // 3 spatial and 3 temporal layer.
1044 // Same as case 9 but overalap in the buffer slot updates.
1045 // (shift = 2). The slots 3 and 4 updated by first TL2 are
1046 // reused for update in TL1 superframe.
1047 // Note for this case, frame order hint must be disabled for
1048 // lower resolutios (operating points > 0) to be decoedable.
1049 case 9:
1050 // 3 spatial and 3 temporal layer.
1051 // No overlap in buffer updates between TL2 and TL1.
1052 // TL2 updates slot 3 and 4, TL1 updates 5, 6, 7.
1053 // Set the references via the svc_ref_frame_config control.
1054 // Always reference LAST.
1055 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1056 if (superframe_cnt % 4 == 0) {
1057 // Base temporal layer.
1058 layer_id->temporal_layer_id = 0;
1059 if (layer_id->spatial_layer_id == 0) {
1060 // Reference LAST, update LAST.
1061 // Set all buffer_idx to 0.
1062 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1063 ref_frame_config->ref_idx[i] = 0;
1064 ref_frame_config->refresh[0] = 1;
1065 } else if (layer_id->spatial_layer_id == 1) {
1066 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1067 // GOLDEN (and all other refs) to slot 0.
1068 // Update slot 1 (LAST).
1069 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1070 ref_frame_config->ref_idx[i] = 0;
1071 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1072 ref_frame_config->refresh[1] = 1;
1073 } else if (layer_id->spatial_layer_id == 2) {
1074 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1075 // GOLDEN (and all other refs) to slot 1.
1076 // Update slot 2 (LAST).
1077 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1078 ref_frame_config->ref_idx[i] = 1;
1079 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1080 ref_frame_config->refresh[2] = 1;
1081 }
1082 } else if ((superframe_cnt - 1) % 4 == 0) {
1083 // First top temporal enhancement layer.
1084 layer_id->temporal_layer_id = 2;
1085 if (layer_id->spatial_layer_id == 0) {
1086 // Reference LAST (slot 0).
1087 // Set GOLDEN to slot 3 and update slot 3.
1088 // Set all other buffer_idx to slot 0.
1089 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1090 ref_frame_config->ref_idx[i] = 0;
1091 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1092 ref_frame_config->refresh[3] = 1;
1093 } else if (layer_id->spatial_layer_id == 1) {
1094 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1095 // GOLDEN (and all other refs) to slot 3.
1096 // Set LAST2 to slot 4 and Update slot 4.
1097 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1098 ref_frame_config->ref_idx[i] = 3;
1099 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1100 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1101 ref_frame_config->refresh[4] = 1;
1102 } else if (layer_id->spatial_layer_id == 2) {
1103 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1104 // GOLDEN (and all other refs) to slot 4.
1105 // No update.
1106 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1107 ref_frame_config->ref_idx[i] = 4;
1108 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1109 }
1110 } else if ((superframe_cnt - 2) % 4 == 0) {
1111 // Middle temporal enhancement layer.
1112 layer_id->temporal_layer_id = 1;
1113 if (layer_id->spatial_layer_id == 0) {
1114 // Reference LAST.
1115 // Set all buffer_idx to 0.
1116 // Set GOLDEN to slot 5 and update slot 5.
1117 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1118 ref_frame_config->ref_idx[i] = 0;
1119 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
1120 ref_frame_config->refresh[5 - shift] = 1;
1121 } else if (layer_id->spatial_layer_id == 1) {
1122 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1123 // GOLDEN (and all other refs) to slot 5.
1124 // Set LAST3 to slot 6 and update slot 6.
1125 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1126 ref_frame_config->ref_idx[i] = 5 - shift;
1127 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1128 ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
1129 ref_frame_config->refresh[6 - shift] = 1;
1130 } else if (layer_id->spatial_layer_id == 2) {
1131 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1132 // GOLDEN (and all other refs) to slot 6.
1133 // Set LAST3 to slot 7 and update slot 7.
1134 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1135 ref_frame_config->ref_idx[i] = 6 - shift;
1136 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1137 ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 7 - shift;
1138 ref_frame_config->refresh[7 - shift] = 1;
1139 }
1140 } else if ((superframe_cnt - 3) % 4 == 0) {
1141 // Second top temporal enhancement layer.
1142 layer_id->temporal_layer_id = 2;
1143 if (layer_id->spatial_layer_id == 0) {
1144 // Set LAST to slot 5 and reference LAST.
1145 // Set GOLDEN to slot 3 and update slot 3.
1146 // Set all other buffer_idx to 0.
1147 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1148 ref_frame_config->ref_idx[i] = 0;
1149 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
1150 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1151 ref_frame_config->refresh[3] = 1;
1152 } else if (layer_id->spatial_layer_id == 1) {
1153 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
1154 // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4.
1155 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1156 ref_frame_config->ref_idx[i] = 0;
1157 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
1158 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1159 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1160 ref_frame_config->refresh[4] = 1;
1161 } else if (layer_id->spatial_layer_id == 2) {
1162 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 7,
1163 // GOLDEN to slot 4. No update.
1164 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1165 ref_frame_config->ref_idx[i] = 0;
1166 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 7 - shift;
1167 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 4;
1168 }
1169 }
1170 break;
1171 case 11:
1172 // Simulcast mode for 3 spatial and 3 temporal layers.
1173 // No inter-layer predicton, only prediction is temporal and single
1174 // reference (LAST).
1175 // No overlap in buffer slots between spatial layers. So for example,
1176 // SL0 only uses slots 0 and 1.
1177 // SL1 only uses slots 2 and 3.
1178 // SL2 only uses slots 4 and 5.
1179 // All 7 references for each inter-frame must only access buffer slots
1180 // for that spatial layer.
1181 // On key (super)frames: SL1 and SL2 must have no references set
1182 // and must refresh all the slots for that layer only (so 2 and 3
1183 // for SL1, 4 and 5 for SL2). The base SL0 will be labelled internally
1184 // as a Key frame (refresh all slots). SL1/SL2 will be labelled
1185 // internally as Intra-only frames that allow that stream to be decoded.
1186 // These conditions will allow for each spatial stream to be
1187 // independently decodeable.
1188
1189 // Initialize all references to 0 (don't use reference).
1190 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1191 ref_frame_config->reference[i] = 0;
1192 // Initialize as no refresh/update for all slots.
1193 for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
1194 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1195 ref_frame_config->ref_idx[i] = 0;
1196
1197 if (is_key_frame) {
1198 if (layer_id->spatial_layer_id == 0) {
1199 // Assign LAST/GOLDEN to slot 0/1.
1200 // Refesh slots 0 and 1 for SL0.
1201 // SL0: this will get set to KEY frame internally.
1202 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1203 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 1;
1204 ref_frame_config->refresh[0] = 1;
1205 ref_frame_config->refresh[1] = 1;
1206 } else if (layer_id->spatial_layer_id == 1) {
1207 // Assign LAST/GOLDEN to slot 2/3.
1208 // Refesh slots 2 and 3 for SL1.
1209 // This will get set to Intra-only frame internally.
1210 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1211 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1212 ref_frame_config->refresh[2] = 1;
1213 ref_frame_config->refresh[3] = 1;
1214 } else if (layer_id->spatial_layer_id == 2) {
1215 // Assign LAST/GOLDEN to slot 4/5.
1216 // Refresh slots 4 and 5 for SL2.
1217 // This will get set to Intra-only frame internally.
1218 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1219 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5;
1220 ref_frame_config->refresh[4] = 1;
1221 ref_frame_config->refresh[5] = 1;
1222 }
1223 } else if (superframe_cnt % 4 == 0) {
1224 // Base temporal layer: TL0
1225 layer_id->temporal_layer_id = 0;
1226 if (layer_id->spatial_layer_id == 0) { // SL0
1227 // Reference LAST. Assign all references to either slot
1228 // 0 or 1. Here we assign LAST to slot 0, all others to 1.
1229 // Update slot 0 (LAST).
1230 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1231 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1232 ref_frame_config->ref_idx[i] = 1;
1233 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1234 ref_frame_config->refresh[0] = 1;
1235 } else if (layer_id->spatial_layer_id == 1) { // SL1
1236 // Reference LAST. Assign all references to either slot
1237 // 2 or 3. Here we assign LAST to slot 2, all others to 3.
1238 // Update slot 2 (LAST).
1239 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1240 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1241 ref_frame_config->ref_idx[i] = 3;
1242 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1243 ref_frame_config->refresh[2] = 1;
1244 } else if (layer_id->spatial_layer_id == 2) { // SL2
1245 // Reference LAST. Assign all references to either slot
1246 // 4 or 5. Here we assign LAST to slot 4, all others to 5.
1247 // Update slot 4 (LAST).
1248 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1249 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1250 ref_frame_config->ref_idx[i] = 5;
1251 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1252 ref_frame_config->refresh[4] = 1;
1253 }
1254 } else if ((superframe_cnt - 1) % 4 == 0) {
1255 // First top temporal enhancement layer: TL2
1256 layer_id->temporal_layer_id = 2;
1257 if (layer_id->spatial_layer_id == 0) { // SL0
1258 // Reference LAST (slot 0). Assign other references to slot 1.
1259 // No update/refresh on any slots.
1260 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1261 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1262 ref_frame_config->ref_idx[i] = 1;
1263 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1264 } else if (layer_id->spatial_layer_id == 1) { // SL1
1265 // Reference LAST (slot 2). Assign other references to slot 3.
1266 // No update/refresh on any slots.
1267 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1268 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1269 ref_frame_config->ref_idx[i] = 3;
1270 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1271 } else if (layer_id->spatial_layer_id == 2) { // SL2
1272 // Reference LAST (slot 4). Assign other references to slot 4.
1273 // No update/refresh on any slots.
1274 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1275 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1276 ref_frame_config->ref_idx[i] = 5;
1277 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1278 }
1279 } else if ((superframe_cnt - 2) % 4 == 0) {
1280 // Middle temporal enhancement layer: TL1
1281 layer_id->temporal_layer_id = 1;
1282 if (layer_id->spatial_layer_id == 0) { // SL0
1283 // Reference LAST (slot 0).
1284 // Set GOLDEN to slot 1 and update slot 1.
1285 // This will be used as reference for next TL2.
1286 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1287 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1288 ref_frame_config->ref_idx[i] = 1;
1289 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1290 ref_frame_config->refresh[1] = 1;
1291 } else if (layer_id->spatial_layer_id == 1) { // SL1
1292 // Reference LAST (slot 2).
1293 // Set GOLDEN to slot 3 and update slot 3.
1294 // This will be used as reference for next TL2.
1295 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1296 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1297 ref_frame_config->ref_idx[i] = 3;
1298 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1299 ref_frame_config->refresh[3] = 1;
1300 } else if (layer_id->spatial_layer_id == 2) { // SL2
1301 // Reference LAST (slot 4).
1302 // Set GOLDEN to slot 5 and update slot 5.
1303 // This will be used as reference for next TL2.
1304 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1305 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1306 ref_frame_config->ref_idx[i] = 5;
1307 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1308 ref_frame_config->refresh[5] = 1;
1309 }
1310 } else if ((superframe_cnt - 3) % 4 == 0) {
1311 // Second top temporal enhancement layer: TL2
1312 layer_id->temporal_layer_id = 2;
1313 if (layer_id->spatial_layer_id == 0) { // SL0
1314 // Reference LAST (slot 1). Assign other references to slot 0.
1315 // No update/refresh on any slots.
1316 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1317 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1318 ref_frame_config->ref_idx[i] = 0;
1319 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1320 } else if (layer_id->spatial_layer_id == 1) { // SL1
1321 // Reference LAST (slot 3). Assign other references to slot 2.
1322 // No update/refresh on any slots.
1323 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1324 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1325 ref_frame_config->ref_idx[i] = 2;
1326 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 3;
1327 } else if (layer_id->spatial_layer_id == 2) { // SL2
1328 // Reference LAST (slot 5). Assign other references to slot 4.
1329 // No update/refresh on any slots.
1330 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1331 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1332 ref_frame_config->ref_idx[i] = 4;
1333 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5;
1334 }
1335 }
1336 if (!simulcast_mode && layer_id->spatial_layer_id > 0) {
1337 // Always reference GOLDEN (inter-layer prediction).
1338 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
1339 if (ksvc_mode) {
1340 // KSVC: only keep the inter-layer reference (GOLDEN) for
1341 // superframes whose base is key.
1342 if (!is_key_frame) ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
1343 }
1344 if (is_key_frame && layer_id->spatial_layer_id > 1) {
1345 // On superframes whose base is key: remove LAST to avoid prediction
1346 // off layer two levels below.
1347 ref_frame_config->reference[SVC_LAST_FRAME] = 0;
1348 }
1349 }
1350 // For 3 spatial layer case 8 (where there is free buffer slot):
1351 // allow for top spatial layer to use additional temporal reference.
1352 // Additional reference is only updated on base temporal layer, every
1353 // 10 TL0 frames here.
1354 if (!simulcast_mode && enable_longterm_temporal_ref &&
1355 layer_id->spatial_layer_id == 2 && layering_mode == 8) {
1356 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
1357 if (!is_key_frame) ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
1358 if (base_count % 10 == 0 && layer_id->temporal_layer_id == 0)
1359 ref_frame_config->refresh[REF_FRAMES - 1] = 1;
1360 }
1361 break;
1362 default: assert(0); die("Error: Unsupported temporal layering mode!\n");
1363 }
1364}
1365
1366static void write_literal(struct aom_write_bit_buffer *wb, int data, int bits,
1367 int offset = 0) {
1368 const int to_write = data - offset;
1369 if (to_write < 0 || to_write >= (1 << bits)) {
1370 die("Invalid data, value %d out of range [%d, %d]\n", data, offset,
1371 offset + (1 << bits) - 1);
1372 }
1373 aom_wb_write_literal(wb, to_write, bits);
1374}
1375
1376static void write_depth_representation_element(
1377 struct aom_write_bit_buffer *buffer,
1378 const std::pair<libaom_examples::DepthRepresentationElement, bool>
1379 &element) {
1380 if (!element.second) {
1381 return;
1382 }
1383 write_literal(buffer, element.first.sign_flag, 1);
1384 write_literal(buffer, element.first.exponent, 7);
1385 int mantissa_len = 1;
1386 while (mantissa_len < 32 && (element.first.mantissa >> mantissa_len != 0)) {
1387 ++mantissa_len;
1388 }
1389 write_literal(buffer, mantissa_len - 1, 5);
1390 write_literal(buffer, element.first.mantissa, mantissa_len);
1391}
1392
1393static void write_color_properties(
1394 struct aom_write_bit_buffer *buffer,
1395 const std::pair<libaom_examples::ColorProperties, bool> &color_properties) {
1396 write_literal(buffer, color_properties.second, 1);
1397 if (color_properties.second) {
1398 write_literal(buffer, color_properties.first.color_range, 1);
1399 write_literal(buffer, color_properties.first.color_primaries, 8);
1400 write_literal(buffer, color_properties.first.transfer_characteristics, 8);
1401 write_literal(buffer, color_properties.first.matrix_coefficients, 8);
1402 } else {
1403 write_literal(buffer, 0, 1); // reserved_1bit
1404 }
1405}
1406
1407static void add_multilayer_metadata(
1408 aom_image_t *frame, const libaom_examples::MultilayerMetadata &multilayer) {
1409 // Pretty large buffer to accommodate the largest multilayer metadata
1410 // possible, with 4 alpha segmentation layers (each can be up to about 66kB).
1411 std::vector<uint8_t> data(66000 * multilayer.layers.size());
1412 struct aom_write_bit_buffer buffer = { data.data(), 0 };
1413
1414 write_literal(&buffer, multilayer.use_case, 6);
1415 if (multilayer.layers.empty()) {
1416 die("Invalid multilayer metadata, no layers found\n");
1417 } else if (multilayer.layers.size() > MAX_NUM_SPATIAL_LAYERS) {
1418 die("Invalid multilayer metadata, too many layers (max is %d)\n",
1419 MAX_NUM_SPATIAL_LAYERS);
1420 }
1421 write_literal(&buffer, (int)multilayer.layers.size() - 1, 2);
1422 assert(buffer.bit_offset % 8 == 0);
1423 for (size_t i = 0; i < multilayer.layers.size(); ++i) {
1424 const libaom_examples::LayerMetadata &layer = multilayer.layers[i];
1425 // Alpha info with segmentation with labels can be up to about 66k bytes,
1426 // which requires 3 bytes to encode in leb128.
1427 const int bytes_reserved_for_size = 3;
1428 // Placeholder for layer_metadata_size which will be written later.
1429 write_literal(&buffer, 0, bytes_reserved_for_size * 8);
1430 const uint32_t metadata_start = buffer.bit_offset;
1431 write_literal(&buffer, (int)i, 2); // ml_spatial_id
1432 write_literal(&buffer, layer.layer_type, 5);
1433 write_literal(&buffer, layer.luma_plane_only_flag, 1);
1434 write_literal(&buffer, layer.layer_view_type, 3);
1435 write_literal(&buffer, layer.group_id, 2);
1436 write_literal(&buffer, layer.layer_dependency_idc, 3);
1437 write_literal(&buffer, layer.layer_metadata_scope, 2);
1438 write_literal(&buffer, 0, 4); // ml_reserved_4bits
1439
1440 if (i > 0) {
1441 write_color_properties(&buffer, layer.layer_color_description);
1442 } else {
1443 write_literal(&buffer, 0, 2); // ml_reserved_2bits
1444 }
1445 assert(buffer.bit_offset % 8 == 0);
1446
1447 if (multilayer.use_case < 12) {
1448 if (layer.layer_type == libaom_examples::MULTIALYER_LAYER_TYPE_ALPHA &&
1449 layer.layer_metadata_scope >= libaom_examples::SCOPE_GLOBAL) {
1450 const libaom_examples::AlphaInformation &alpha_info =
1451 layer.global_alpha_info;
1452 write_literal(&buffer, alpha_info.alpha_use_idc, 3);
1453 write_literal(&buffer, alpha_info.alpha_bit_depth, 3, /*offset=*/8);
1454 write_literal(&buffer, alpha_info.alpha_clip_idc, 2);
1455 write_literal(&buffer, alpha_info.alpha_incr_flag, 1);
1456 write_literal(&buffer, alpha_info.alpha_transparent_value,
1457 alpha_info.alpha_bit_depth);
1458 write_literal(&buffer, alpha_info.alpha_opaque_value,
1459 alpha_info.alpha_bit_depth);
1460 if (buffer.bit_offset % 8 != 0) {
1461 // ai_byte_alignment_bits
1462 write_literal(&buffer, 0, 8 - (buffer.bit_offset % 8));
1463 }
1464 assert(buffer.bit_offset % 8 == 0);
1465
1466 if (alpha_info.alpha_use_idc == libaom_examples::ALPHA_STRAIGHT) {
1467 write_literal(&buffer, 0, 6); // ai_reserved_6bits
1468 write_color_properties(&buffer, alpha_info.alpha_color_description);
1469 } else if (alpha_info.alpha_use_idc ==
1470 libaom_examples::ALPHA_SEGMENTATION) {
1471 write_literal(&buffer, 0, 7); // ai_reserved_7bits
1472 write_literal(&buffer, !alpha_info.label_type_id.empty(), 1);
1473 if (!alpha_info.label_type_id.empty()) {
1474 const size_t num_values =
1475 std::abs(alpha_info.alpha_transparent_value -
1476 alpha_info.alpha_opaque_value) +
1477 1;
1478 if (!alpha_info.label_type_id.empty() &&
1479 alpha_info.label_type_id.size() != num_values) {
1480 die("Invalid multilayer metadata, label_type_id size must be "
1481 "equal to the range of alpha values between "
1482 "alpha_transparent_value and alpha_opaque_value (expected "
1483 "%d values, found %d values)\n",
1484 (int)num_values, (int)alpha_info.label_type_id.size());
1485 }
1486 for (size_t j = 0; j < num_values; ++j) {
1487 write_literal(&buffer, alpha_info.label_type_id[j], 16);
1488 }
1489 }
1490 }
1491 assert(buffer.bit_offset % 8 == 0);
1492 } else if (layer.layer_type ==
1493 libaom_examples::MULTIALYER_LAYER_TYPE_DEPTH &&
1494 layer.layer_metadata_scope >= libaom_examples::SCOPE_GLOBAL) {
1495 const libaom_examples::DepthInformation &depth_info =
1496 layer.global_depth_info;
1497 write_literal(&buffer, depth_info.z_near.second, 1);
1498 write_literal(&buffer, depth_info.z_far.second, 1);
1499 write_literal(&buffer, depth_info.d_min.second, 1);
1500 write_literal(&buffer, depth_info.d_max.second, 1);
1501 write_literal(&buffer, depth_info.depth_representation_type, 4);
1502 if (depth_info.d_min.second || depth_info.d_max.second) {
1503 write_literal(&buffer, depth_info.disparity_ref_view_id, 2);
1504 }
1505 write_depth_representation_element(&buffer, depth_info.z_near);
1506 write_depth_representation_element(&buffer, depth_info.z_far);
1507 write_depth_representation_element(&buffer, depth_info.d_min);
1508 write_depth_representation_element(&buffer, depth_info.d_max);
1509 if (depth_info.depth_representation_type == 3) {
1510 write_literal(&buffer, depth_info.depth_nonlinear_precision, 4,
1511 /*offset=*/8);
1512 if (depth_info.depth_nonlinear_representation_model.empty() ||
1513 depth_info.depth_nonlinear_representation_model.size() >
1514 (1 << 6)) {
1515 die("Invalid multilayer metadata, if depth_nonlinear_precision "
1516 "== 3, depth_nonlinear_representation_model must have 1 to "
1517 "%d elements, found %d elements\n",
1518 1 << 6,
1519 (int)depth_info.depth_nonlinear_representation_model.size());
1520 }
1521 write_literal(
1522 &buffer,
1523 (int)depth_info.depth_nonlinear_representation_model.size() - 1,
1524 6);
1525 const int bit_depth =
1526 depth_info.depth_nonlinear_precision + 8; // XXX + 9 ???
1527 for (const uint32_t v :
1528 depth_info.depth_nonlinear_representation_model) {
1529 write_literal(&buffer, v, bit_depth);
1530 }
1531 }
1532 if (buffer.bit_offset % 8 != 0) {
1533 write_literal(&buffer, 0, 8 - (buffer.bit_offset % 8));
1534 }
1535 assert(buffer.bit_offset % 8 == 0);
1536 }
1537 }
1538 assert(buffer.bit_offset % 8 == 0);
1539
1540 const int metadata_size_bytes = (buffer.bit_offset - metadata_start) / 8;
1541 const uint8_t size_pos = metadata_start / 8 - bytes_reserved_for_size;
1542 size_t coded_size;
1543 if (aom_uleb_encode_fixed_size(metadata_size_bytes, bytes_reserved_for_size,
1544 bytes_reserved_for_size,
1545 &buffer.bit_buffer[size_pos], &coded_size)) {
1546 // Need to increase bytes_reserved_for_size in the code above.
1547 die("Error: Failed to write metadata size\n");
1548 }
1549 }
1550 assert(buffer.bit_offset % 8 == 0);
1551 if (aom_img_add_metadata(frame, 33 /*METADATA_TYPE_MULTILAYER*/,
1552 buffer.bit_buffer, buffer.bit_offset / 8,
1554 die("Error: Failed to add metadata\n");
1555 }
1556}
1557
1558#if CONFIG_AV1_DECODER
1559// Returns whether there is a mismatch between the encoder's new frame and the
1560// decoder's new frame.
1561static int test_decode(aom_codec_ctx_t *encoder, aom_codec_ctx_t *decoder,
1562 const int frames_out) {
1563 aom_image_t enc_img, dec_img;
1564 int mismatch = 0;
1565
1566 /* Get the internal new frame */
1569
1570#if CONFIG_AV1_HIGHBITDEPTH
1571 if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) !=
1572 (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) {
1573 if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1574 aom_image_t enc_hbd_img;
1576 &enc_hbd_img,
1577 static_cast<aom_img_fmt_t>(enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1578 enc_img.d_w, enc_img.d_h, 16);
1579 aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img);
1580 enc_img = enc_hbd_img;
1581 }
1582 if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1583 aom_image_t dec_hbd_img;
1585 &dec_hbd_img,
1586 static_cast<aom_img_fmt_t>(dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1587 dec_img.d_w, dec_img.d_h, 16);
1588 aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img);
1589 dec_img = dec_hbd_img;
1590 }
1591 }
1592#endif
1593
1594 if (!aom_compare_img(&enc_img, &dec_img)) {
1595 int y[4], u[4], v[4];
1596#if CONFIG_AV1_HIGHBITDEPTH
1597 if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1598 aom_find_mismatch_high(&enc_img, &dec_img, y, u, v);
1599 } else {
1600 aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1601 }
1602#else
1603 aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1604#endif
1605 fprintf(stderr,
1606 "Encode/decode mismatch on frame %d at"
1607 " Y[%d, %d] {%d/%d},"
1608 " U[%d, %d] {%d/%d},"
1609 " V[%d, %d] {%d/%d}\n",
1610 frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0],
1611 v[1], v[2], v[3]);
1612 mismatch = 1;
1613 }
1614
1615 aom_img_free(&enc_img);
1616 aom_img_free(&dec_img);
1617 return mismatch;
1618}
1619#endif // CONFIG_AV1_DECODER
1620
1621struct psnr_stats {
1622 // The second element of these arrays is reserved for high bitdepth.
1623 uint64_t psnr_sse_total[2];
1624 uint64_t psnr_samples_total[2];
1625 double psnr_totals[2][4];
1626 int psnr_count[2];
1627};
1628
1629static void show_psnr(struct psnr_stats *psnr_stream, double peak) {
1630 double ovpsnr;
1631
1632 if (!psnr_stream->psnr_count[0]) return;
1633
1634 fprintf(stderr, "\nPSNR (Overall/Avg/Y/U/V)");
1635 ovpsnr = sse_to_psnr((double)psnr_stream->psnr_samples_total[0], peak,
1636 (double)psnr_stream->psnr_sse_total[0]);
1637 fprintf(stderr, " %.3f", ovpsnr);
1638
1639 for (int i = 0; i < 4; i++) {
1640 fprintf(stderr, " %.3f",
1641 psnr_stream->psnr_totals[0][i] / psnr_stream->psnr_count[0]);
1642 }
1643 fprintf(stderr, "\n");
1644}
1645
1646static aom::AV1RateControlRtcConfig create_rtc_rc_config(
1647 const aom_codec_enc_cfg_t &cfg, const AppInput &app_input) {
1648 aom::AV1RateControlRtcConfig rc_cfg;
1649 rc_cfg.width = cfg.g_w;
1650 rc_cfg.height = cfg.g_h;
1651 rc_cfg.max_quantizer = cfg.rc_max_quantizer;
1652 rc_cfg.min_quantizer = cfg.rc_min_quantizer;
1653 rc_cfg.target_bandwidth = cfg.rc_target_bitrate;
1654 rc_cfg.buf_initial_sz = cfg.rc_buf_initial_sz;
1655 rc_cfg.buf_optimal_sz = cfg.rc_buf_optimal_sz;
1656 rc_cfg.buf_sz = cfg.rc_buf_sz;
1657 rc_cfg.overshoot_pct = cfg.rc_overshoot_pct;
1658 rc_cfg.undershoot_pct = cfg.rc_undershoot_pct;
1659 // This is hardcoded as AOME_SET_MAX_INTRA_BITRATE_PCT
1660 rc_cfg.max_intra_bitrate_pct = 300;
1661 rc_cfg.framerate = cfg.g_timebase.den;
1662 // TODO(jianj): Add suppor for SVC.
1663 rc_cfg.ss_number_layers = 1;
1664 rc_cfg.ts_number_layers = 1;
1665 rc_cfg.scaling_factor_num[0] = 1;
1666 rc_cfg.scaling_factor_den[0] = 1;
1667 rc_cfg.layer_target_bitrate[0] = static_cast<int>(rc_cfg.target_bandwidth);
1668 rc_cfg.max_quantizers[0] = rc_cfg.max_quantizer;
1669 rc_cfg.min_quantizers[0] = rc_cfg.min_quantizer;
1670 rc_cfg.aq_mode = app_input.aq_mode;
1671
1672 return rc_cfg;
1673}
1674
1675static int qindex_to_quantizer(int qindex) {
1676 // Table that converts 0-63 range Q values passed in outside to the 0-255
1677 // range Qindex used internally.
1678 static const int quantizer_to_qindex[] = {
1679 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48,
1680 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100,
1681 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152,
1682 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204,
1683 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255,
1684 };
1685 for (int quantizer = 0; quantizer < 64; ++quantizer)
1686 if (quantizer_to_qindex[quantizer] >= qindex) return quantizer;
1687
1688 return 63;
1689}
1690
1691static void set_active_map(const aom_codec_enc_cfg_t *cfg,
1692 aom_codec_ctx_t *codec, int frame_cnt) {
1693 aom_active_map_t map = { 0, 0, 0 };
1694
1695 map.rows = (cfg->g_h + 15) / 16;
1696 map.cols = (cfg->g_w + 15) / 16;
1697
1698 map.active_map = (uint8_t *)malloc(map.rows * map.cols);
1699 if (!map.active_map) die("Failed to allocate active map");
1700
1701 // Example map for testing.
1702 for (unsigned int i = 0; i < map.rows; ++i) {
1703 for (unsigned int j = 0; j < map.cols; ++j) {
1704 int index = map.cols * i + j;
1705 map.active_map[index] = 1;
1706 if (frame_cnt < 300) {
1707 if (i < map.rows / 2 && j < map.cols / 2) map.active_map[index] = 0;
1708 } else if (frame_cnt >= 300) {
1709 if (i < map.rows / 2 && j >= map.cols / 2) map.active_map[index] = 0;
1710 }
1711 }
1712 }
1713
1714 if (aom_codec_control(codec, AOME_SET_ACTIVEMAP, &map))
1715 die_codec(codec, "Failed to set active map");
1716
1717 free(map.active_map);
1718}
1719
1720int main(int argc, const char **argv) {
1721 AppInput app_input;
1722 AvxVideoWriter *outfile[AOM_MAX_LAYERS] = { NULL };
1723 FILE *obu_files[AOM_MAX_LAYERS] = { NULL };
1724 AvxVideoWriter *total_layer_file = NULL;
1725 FILE *total_layer_obu_file = NULL;
1727 int frame_cnt = 0;
1728 aom_image_t raw;
1729 int frame_avail;
1730 int got_data = 0;
1731 int flags = 0;
1732 int i;
1733 int pts = 0; // PTS starts at 0.
1734 int frame_duration = 1; // 1 timebase tick per frame.
1735 aom_svc_layer_id_t layer_id;
1736 aom_svc_params_t svc_params;
1737 aom_svc_ref_frame_config_t ref_frame_config;
1738 aom_svc_ref_frame_comp_pred_t ref_frame_comp_pred;
1739
1740#if CONFIG_INTERNAL_STATS
1741 FILE *stats_file = fopen("opsnr.stt", "a");
1742 if (stats_file == NULL) {
1743 die("Cannot open opsnr.stt\n");
1744 }
1745#endif
1746#if CONFIG_AV1_DECODER
1747 aom_codec_ctx_t decoder;
1748#endif
1749
1750 struct RateControlMetrics rc;
1751 int64_t cx_time = 0;
1752 int64_t cx_time_layer[AOM_MAX_LAYERS]; // max number of layers.
1753 int frame_cnt_layer[AOM_MAX_LAYERS];
1754 double sum_bitrate = 0.0;
1755 double sum_bitrate2 = 0.0;
1756 double framerate = 30.0;
1757 int use_svc_control = 1;
1758 int set_err_resil_frame = 0;
1759 int test_changing_bitrate = 0;
1760 zero(rc.layer_target_bitrate);
1761 memset(&layer_id, 0, sizeof(aom_svc_layer_id_t));
1762 memset(&app_input, 0, sizeof(AppInput));
1763 memset(&svc_params, 0, sizeof(svc_params));
1764
1765 // Flag to test dynamic scaling of source frames for single
1766 // spatial stream, using the scaling_mode control.
1767 const int test_dynamic_scaling_single_layer = 0;
1768
1769 // Flag to test setting speed per layer.
1770 const int test_speed_per_layer = 0;
1771
1772 // Flag for testing active maps.
1773 const int test_active_maps = 0;
1774
1775 /* Setup default input stream settings */
1776 for (i = 0; i < MAX_NUM_SPATIAL_LAYERS; ++i) {
1777 app_input.input_ctx[i].framerate.numerator = 30;
1778 app_input.input_ctx[i].framerate.denominator = 1;
1779 app_input.input_ctx[i].only_i420 = 0;
1780 app_input.input_ctx[i].bit_depth = AOM_BITS_8;
1781 }
1782 app_input.speed = 7;
1783 exec_name = argv[0];
1784
1785 // start with default encoder configuration
1788 if (res != AOM_CODEC_OK) {
1789 die("Failed to get config: %s\n", aom_codec_err_to_string(res));
1790 }
1791
1792 // Real time parameters.
1794
1795 cfg.rc_end_usage = AOM_CBR;
1796 cfg.rc_min_quantizer = 2;
1797 cfg.rc_max_quantizer = 52;
1798 cfg.rc_undershoot_pct = 50;
1799 cfg.rc_overshoot_pct = 50;
1800 cfg.rc_buf_initial_sz = 600;
1801 cfg.rc_buf_optimal_sz = 600;
1802 cfg.rc_buf_sz = 1000;
1803 cfg.rc_resize_mode = 0; // Set to RESIZE_DYNAMIC for dynamic resize.
1804 cfg.g_lag_in_frames = 0;
1805 cfg.kf_mode = AOM_KF_AUTO;
1806 cfg.g_w = 0; // Force user to specify width and height for raw input.
1807 cfg.g_h = 0;
1808
1809 parse_command_line(argc, argv, &app_input, &svc_params, &cfg);
1810
1811 int ts_number_layers = svc_params.number_temporal_layers;
1812 int ss_number_layers = svc_params.number_spatial_layers;
1813
1814 unsigned int width = cfg.g_w;
1815 unsigned int height = cfg.g_h;
1816
1817 if (app_input.layering_mode >= 0) {
1818 if (ts_number_layers !=
1819 mode_to_num_temporal_layers[app_input.layering_mode] ||
1820 ss_number_layers !=
1821 mode_to_num_spatial_layers[app_input.layering_mode]) {
1822 die("Number of layers doesn't match layering mode.");
1823 }
1824 }
1825
1826 bool has_non_y4m_input = false;
1827 for (i = 0; i < AOM_MAX_LAYERS; ++i) {
1828 if (app_input.input_ctx[i].file_type != FILE_TYPE_Y4M) {
1829 has_non_y4m_input = true;
1830 break;
1831 }
1832 }
1833 // Y4M reader has its own allocation.
1834 if (has_non_y4m_input) {
1835 if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, width, height, 32)) {
1836 die("Failed to allocate image (%dx%d)", width, height);
1837 }
1838 }
1839
1841
1842 memcpy(&rc.layer_target_bitrate[0], &svc_params.layer_target_bitrate[0],
1843 sizeof(svc_params.layer_target_bitrate));
1844
1845 unsigned int total_rate = 0;
1846 for (i = 0; i < ss_number_layers; i++) {
1847 total_rate +=
1848 svc_params
1849 .layer_target_bitrate[i * ts_number_layers + ts_number_layers - 1];
1850 }
1851 if (total_rate != cfg.rc_target_bitrate) {
1852 die("Incorrect total target bitrate, expected: %d", total_rate);
1853 }
1854
1855 svc_params.framerate_factor[0] = 1;
1856 if (ts_number_layers == 2) {
1857 svc_params.framerate_factor[0] = 2;
1858 svc_params.framerate_factor[1] = 1;
1859 } else if (ts_number_layers == 3) {
1860 svc_params.framerate_factor[0] = 4;
1861 svc_params.framerate_factor[1] = 2;
1862 svc_params.framerate_factor[2] = 1;
1863 }
1864
1865 libaom_examples::MultilayerMetadata multilayer_metadata;
1866 if (app_input.multilayer_metadata_file != NULL) {
1867 multilayer_metadata = libaom_examples::parse_multilayer_file(
1868 app_input.multilayer_metadata_file);
1869 libaom_examples::print_multilayer_metadata(multilayer_metadata);
1870 }
1871
1872 framerate = cfg.g_timebase.den / cfg.g_timebase.num;
1873 set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers);
1874
1875 AvxVideoInfo info;
1876 info.codec_fourcc = get_fourcc_by_aom_encoder(encoder);
1877 info.frame_width = cfg.g_w;
1878 info.frame_height = cfg.g_h;
1879 info.time_base.numerator = cfg.g_timebase.num;
1880 info.time_base.denominator = cfg.g_timebase.den;
1881 // Open an output file for each stream.
1882 for (int sl = 0; sl < ss_number_layers; ++sl) {
1883 for (int tl = 0; tl < ts_number_layers; ++tl) {
1884 i = sl * ts_number_layers + tl;
1885 char file_name[PATH_MAX];
1886 snprintf(file_name, sizeof(file_name), "%s_%d.av1",
1887 app_input.output_filename, i);
1888 if (app_input.output_obu) {
1889 obu_files[i] = fopen(file_name, "wb");
1890 if (!obu_files[i]) die("Failed to open %s for writing", file_name);
1891 } else {
1892 outfile[i] = aom_video_writer_open(file_name, kContainerIVF, &info);
1893 if (!outfile[i]) die("Failed to open %s for writing", file_name);
1894 }
1895 }
1896 }
1897 if (app_input.output_obu) {
1898 total_layer_obu_file = fopen(app_input.output_filename, "wb");
1899 if (!total_layer_obu_file)
1900 die("Failed to open %s for writing", app_input.output_filename);
1901 } else {
1902 total_layer_file =
1903 aom_video_writer_open(app_input.output_filename, kContainerIVF, &info);
1904 if (!total_layer_file)
1905 die("Failed to open %s for writing", app_input.output_filename);
1906 }
1907
1908 // Initialize codec.
1909 aom_codec_ctx_t codec;
1910 aom_codec_flags_t flag = 0;
1912 flag |= app_input.show_psnr ? AOM_CODEC_USE_PSNR : 0;
1913 if (aom_codec_enc_init(&codec, encoder, &cfg, flag))
1914 die_codec(&codec, "Failed to initialize encoder");
1915
1916#if CONFIG_AV1_DECODER
1917 if (app_input.decode) {
1918 if (aom_codec_dec_init(&decoder, get_aom_decoder_by_index(0), NULL, 0))
1919 die_codec(&decoder, "Failed to initialize decoder");
1920 }
1921#endif
1922
1923 aom_codec_control(&codec, AOME_SET_CPUUSED, app_input.speed);
1924 aom_codec_control(&codec, AV1E_SET_AQ_MODE, app_input.aq_mode ? 3 : 0);
1939
1940 // Settings to reduce key frame encoding time.
1946
1948
1949 aom_codec_control(&codec, AV1E_SET_TUNE_CONTENT, app_input.tune_content);
1950 if (app_input.tune_content == AOM_CONTENT_SCREEN) {
1952 // INTRABC is currently disabled for rt mode, as it's too slow.
1954 }
1955
1956 if (app_input.use_external_rc) {
1958 }
1959
1961
1964
1966
1967 svc_params.number_spatial_layers = ss_number_layers;
1968 svc_params.number_temporal_layers = ts_number_layers;
1969 for (i = 0; i < ss_number_layers * ts_number_layers; ++i) {
1970 svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
1971 svc_params.min_quantizers[i] = cfg.rc_min_quantizer;
1972 }
1973 if (!app_input.scale_factors_explicitly_set) {
1974 for (i = 0; i < ss_number_layers; ++i) {
1975 svc_params.scaling_factor_num[i] = 1;
1976 svc_params.scaling_factor_den[i] = 1;
1977 }
1978 if (ss_number_layers == 2) {
1979 svc_params.scaling_factor_num[0] = 1;
1980 svc_params.scaling_factor_den[0] = 2;
1981 } else if (ss_number_layers == 3) {
1982 svc_params.scaling_factor_num[0] = 1;
1983 svc_params.scaling_factor_den[0] = 4;
1984 svc_params.scaling_factor_num[1] = 1;
1985 svc_params.scaling_factor_den[1] = 2;
1986 }
1987 }
1988 aom_codec_control(&codec, AV1E_SET_SVC_PARAMS, &svc_params);
1989 // TODO(aomedia:3032): Configure KSVC in fixed mode.
1990
1991 // This controls the maximum target size of the key frame.
1992 // For generating smaller key frames, use a smaller max_intra_size_pct
1993 // value, like 100 or 200.
1994 {
1995 const int max_intra_size_pct = 300;
1997 max_intra_size_pct);
1998 }
1999
2000 for (int lx = 0; lx < ts_number_layers * ss_number_layers; lx++) {
2001 cx_time_layer[lx] = 0;
2002 frame_cnt_layer[lx] = 0;
2003 }
2004
2005 std::unique_ptr<aom::AV1RateControlRTC> rc_api;
2006 if (app_input.use_external_rc) {
2007 const aom::AV1RateControlRtcConfig rc_cfg =
2008 create_rtc_rc_config(cfg, app_input);
2009 rc_api = aom::AV1RateControlRTC::Create(rc_cfg);
2010 }
2011
2012 frame_avail = 1;
2013 struct psnr_stats psnr_stream;
2014 memset(&psnr_stream, 0, sizeof(psnr_stream));
2015 while (frame_avail || got_data) {
2016 struct aom_usec_timer timer;
2017 frame_avail = read_frame(&(app_input.input_ctx[0]), &raw);
2018 // Loop over spatial layers.
2019 for (int slx = 0; slx < ss_number_layers; slx++) {
2020 if (slx > 0 && app_input.input_ctx[slx].filename != NULL) {
2021 const int previous_layer_frame_avail = frame_avail;
2022 frame_avail = read_frame(&(app_input.input_ctx[slx]), &raw);
2023 if (previous_layer_frame_avail != frame_avail) {
2024 die("Mismatch in number of frames between spatial layer input files");
2025 }
2026 }
2027
2028 aom_codec_iter_t iter = NULL;
2029 const aom_codec_cx_pkt_t *pkt;
2030 int layer = 0;
2031 // Flag for superframe whose base is key.
2032 int is_key_frame = (frame_cnt % cfg.kf_max_dist) == 0;
2033 // For flexible mode:
2034 if (app_input.layering_mode >= 0) {
2035 // Set the reference/update flags, layer_id, and reference_map
2036 // buffer index.
2037 set_layer_pattern(app_input.layering_mode, frame_cnt, &layer_id,
2038 &ref_frame_config, &ref_frame_comp_pred,
2039 &use_svc_control, slx, is_key_frame,
2040 (app_input.layering_mode == 10), app_input.speed);
2041 aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
2042 if (use_svc_control) {
2044 &ref_frame_config);
2046 &ref_frame_comp_pred);
2047 }
2048 if (app_input.multilayer_metadata_file != NULL) {
2049 add_multilayer_metadata(&raw, multilayer_metadata);
2050 }
2051 // Set the speed per layer.
2052 if (test_speed_per_layer) {
2053 int speed_per_layer = 10;
2054 if (layer_id.spatial_layer_id == 0) {
2055 if (layer_id.temporal_layer_id == 0) speed_per_layer = 6;
2056 if (layer_id.temporal_layer_id == 1) speed_per_layer = 7;
2057 if (layer_id.temporal_layer_id == 2) speed_per_layer = 8;
2058 } else if (layer_id.spatial_layer_id == 1) {
2059 if (layer_id.temporal_layer_id == 0) speed_per_layer = 7;
2060 if (layer_id.temporal_layer_id == 1) speed_per_layer = 8;
2061 if (layer_id.temporal_layer_id == 2) speed_per_layer = 9;
2062 } else if (layer_id.spatial_layer_id == 2) {
2063 if (layer_id.temporal_layer_id == 0) speed_per_layer = 8;
2064 if (layer_id.temporal_layer_id == 1) speed_per_layer = 9;
2065 if (layer_id.temporal_layer_id == 2) speed_per_layer = 10;
2066 }
2067 aom_codec_control(&codec, AOME_SET_CPUUSED, speed_per_layer);
2068 }
2069 } else {
2070 // Only up to 3 temporal layers supported in fixed mode.
2071 // Only need to set spatial and temporal layer_id: reference
2072 // prediction, refresh, and buffer_idx are set internally.
2073 layer_id.spatial_layer_id = slx;
2074 layer_id.temporal_layer_id = 0;
2075 if (ts_number_layers == 2) {
2076 layer_id.temporal_layer_id = (frame_cnt % 2) != 0;
2077 } else if (ts_number_layers == 3) {
2078 if (frame_cnt % 2 != 0)
2079 layer_id.temporal_layer_id = 2;
2080 else if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0))
2081 layer_id.temporal_layer_id = 1;
2082 }
2083 aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
2084 }
2085
2086 if (set_err_resil_frame && cfg.g_error_resilient == 0) {
2087 // Set error_resilient per frame: off/0 for base layer and
2088 // on/1 for enhancement layer frames.
2089 // Note that this is can only be done on the fly/per-frame/layer
2090 // if the config error_resilience is off/0. See the logic for updating
2091 // in set_encoder_config():
2092 // tool_cfg->error_resilient_mode =
2093 // cfg->g_error_resilient | extra_cfg->error_resilient_mode;
2094 const int err_resil_mode =
2095 layer_id.spatial_layer_id > 0 || layer_id.temporal_layer_id > 0;
2097 err_resil_mode);
2098 }
2099
2100 layer = slx * ts_number_layers + layer_id.temporal_layer_id;
2101 if (frame_avail && slx == 0) ++rc.layer_input_frames[layer];
2102
2103 if (test_dynamic_scaling_single_layer) {
2104 // Example to scale source down by 2x2, then 4x4, and then back up to
2105 // 2x2, and then back to original.
2106 int frame_2x2 = 200;
2107 int frame_4x4 = 400;
2108 int frame_2x2up = 600;
2109 int frame_orig = 800;
2110 if (frame_cnt >= frame_2x2 && frame_cnt < frame_4x4) {
2111 // Scale source down by 2x2.
2112 struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
2113 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2114 } else if (frame_cnt >= frame_4x4 && frame_cnt < frame_2x2up) {
2115 // Scale source down by 4x4.
2116 struct aom_scaling_mode mode = { AOME_ONEFOUR, AOME_ONEFOUR };
2117 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2118 } else if (frame_cnt >= frame_2x2up && frame_cnt < frame_orig) {
2119 // Source back up to 2x2.
2120 struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
2121 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2122 } else if (frame_cnt >= frame_orig) {
2123 // Source back up to original resolution (no scaling).
2124 struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL };
2125 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2126 }
2127 if (frame_cnt == frame_2x2 || frame_cnt == frame_4x4 ||
2128 frame_cnt == frame_2x2up || frame_cnt == frame_orig) {
2129 // For dynamic resize testing on single layer: refresh all references
2130 // on the resized frame: this is to avoid decode error:
2131 // if resize goes down by >= 4x4 then libaom decoder will throw an
2132 // error that some reference (even though not used) is beyond the
2133 // limit size (must be smaller than 4x4).
2134 for (i = 0; i < REF_FRAMES; i++) ref_frame_config.refresh[i] = 1;
2135 if (use_svc_control) {
2137 &ref_frame_config);
2139 &ref_frame_comp_pred);
2140 }
2141 }
2142 }
2143
2144 // Change target_bitrate every other frame.
2145 if (test_changing_bitrate && frame_cnt % 2 == 0) {
2146 if (frame_cnt < 500)
2147 cfg.rc_target_bitrate += 10;
2148 else
2149 cfg.rc_target_bitrate -= 10;
2150 // Do big increase and decrease.
2151 if (frame_cnt == 100) cfg.rc_target_bitrate <<= 1;
2152 if (frame_cnt == 600) cfg.rc_target_bitrate >>= 1;
2153 if (cfg.rc_target_bitrate < 100) cfg.rc_target_bitrate = 100;
2154 // Call change_config, or bypass with new control.
2155 // res = aom_codec_enc_config_set(&codec, &cfg);
2157 cfg.rc_target_bitrate))
2158 die_codec(&codec, "Failed to SET_BITRATE_ONE_PASS_CBR");
2159 }
2160
2161 if (rc_api) {
2162 aom::AV1FrameParamsRTC frame_params;
2163 // TODO(jianj): Add support for SVC.
2164 frame_params.spatial_layer_id = 0;
2165 frame_params.temporal_layer_id = 0;
2166 frame_params.frame_type =
2167 is_key_frame ? aom::kKeyFrame : aom::kInterFrame;
2168 rc_api->ComputeQP(frame_params);
2169 const int current_qp = rc_api->GetQP();
2171 qindex_to_quantizer(current_qp))) {
2172 die_codec(&codec, "Failed to SET_QUANTIZER_ONE_PASS");
2173 }
2174 }
2175
2176 if (test_active_maps) set_active_map(&cfg, &codec, frame_cnt);
2177
2178 // Do the layer encode.
2179 aom_usec_timer_start(&timer);
2180 if (aom_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags))
2181 die_codec(&codec, "Failed to encode frame");
2182 aom_usec_timer_mark(&timer);
2183 cx_time += aom_usec_timer_elapsed(&timer);
2184 cx_time_layer[layer] += aom_usec_timer_elapsed(&timer);
2185 frame_cnt_layer[layer] += 1;
2186
2187 // Get the high motion content flag.
2188 int content_flag = 0;
2190 &content_flag)) {
2191 die_codec(&codec, "Failed to GET_HIGH_MOTION_CONTENT_SCREEN_RTC");
2192 }
2193
2194 got_data = 0;
2195 // For simulcast (mode 11): write out each spatial layer to the file.
2196 int ss_layers_write = (app_input.layering_mode == 11)
2197 ? layer_id.spatial_layer_id + 1
2198 : ss_number_layers;
2199 while ((pkt = aom_codec_get_cx_data(&codec, &iter))) {
2200 switch (pkt->kind) {
2202 for (int sl = layer_id.spatial_layer_id; sl < ss_layers_write;
2203 ++sl) {
2204 for (int tl = layer_id.temporal_layer_id; tl < ts_number_layers;
2205 ++tl) {
2206 int j = sl * ts_number_layers + tl;
2207 if (app_input.output_obu) {
2208 fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
2209 obu_files[j]);
2210 } else {
2211 aom_video_writer_write_frame(
2212 outfile[j],
2213 reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2214 pkt->data.frame.sz, pts);
2215 }
2216 if (sl == layer_id.spatial_layer_id)
2217 rc.layer_encoding_bitrate[j] += 8.0 * pkt->data.frame.sz;
2218 }
2219 }
2220 got_data = 1;
2221 // Write everything into the top layer.
2222 if (app_input.output_obu) {
2223 fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
2224 total_layer_obu_file);
2225 } else {
2226 aom_video_writer_write_frame(
2227 total_layer_file,
2228 reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2229 pkt->data.frame.sz, pts);
2230 }
2231 // Keep count of rate control stats per layer (for non-key).
2232 if (!(pkt->data.frame.flags & AOM_FRAME_IS_KEY)) {
2233 int j = layer_id.spatial_layer_id * ts_number_layers +
2234 layer_id.temporal_layer_id;
2235 assert(j >= 0);
2236 rc.layer_avg_frame_size[j] += 8.0 * pkt->data.frame.sz;
2237 rc.layer_avg_rate_mismatch[j] +=
2238 fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[j]) /
2239 rc.layer_pfb[j];
2240 if (slx == 0) ++rc.layer_enc_frames[layer_id.temporal_layer_id];
2241 }
2242
2243 if (rc_api) {
2244 rc_api->PostEncodeUpdate(pkt->data.frame.sz);
2245 }
2246 // Update for short-time encoding bitrate states, for moving window
2247 // of size rc->window, shifted by rc->window / 2.
2248 // Ignore first window segment, due to key frame.
2249 // For spatial layers: only do this for top/highest SL.
2250 if (frame_cnt > rc.window_size && slx == ss_number_layers - 1) {
2251 sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
2252 rc.window_size = (rc.window_size <= 0) ? 1 : rc.window_size;
2253 if (frame_cnt % rc.window_size == 0) {
2254 rc.window_count += 1;
2255 rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
2256 rc.variance_st_encoding_bitrate +=
2257 (sum_bitrate / rc.window_size) *
2258 (sum_bitrate / rc.window_size);
2259 sum_bitrate = 0.0;
2260 }
2261 }
2262 // Second shifted window.
2263 if (frame_cnt > rc.window_size + rc.window_size / 2 &&
2264 slx == ss_number_layers - 1) {
2265 sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
2266 if (frame_cnt > 2 * rc.window_size &&
2267 frame_cnt % rc.window_size == 0) {
2268 rc.window_count += 1;
2269 rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
2270 rc.variance_st_encoding_bitrate +=
2271 (sum_bitrate2 / rc.window_size) *
2272 (sum_bitrate2 / rc.window_size);
2273 sum_bitrate2 = 0.0;
2274 }
2275 }
2276
2277#if CONFIG_AV1_DECODER
2278 if (app_input.decode) {
2279 if (aom_codec_decode(
2280 &decoder,
2281 reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2282 pkt->data.frame.sz, NULL))
2283 die_codec(&decoder, "Failed to decode frame");
2284 }
2285#endif
2286
2287 break;
2288 case AOM_CODEC_PSNR_PKT:
2289 if (app_input.show_psnr) {
2290 psnr_stream.psnr_sse_total[0] += pkt->data.psnr.sse[0];
2291 psnr_stream.psnr_samples_total[0] += pkt->data.psnr.samples[0];
2292 for (int plane = 0; plane < 4; plane++) {
2293 psnr_stream.psnr_totals[0][plane] += pkt->data.psnr.psnr[plane];
2294 }
2295 psnr_stream.psnr_count[0]++;
2296 }
2297 break;
2298 default: break;
2299 }
2300 }
2301#if CONFIG_AV1_DECODER
2302 if (got_data && app_input.decode) {
2303 // Don't look for mismatch on top spatial and top temporal layers as
2304 // they are non reference frames.
2305 if ((ss_number_layers > 1 || ts_number_layers > 1) &&
2306 !(layer_id.temporal_layer_id > 0 &&
2307 layer_id.temporal_layer_id == ts_number_layers - 1)) {
2308 if (test_decode(&codec, &decoder, frame_cnt)) {
2309#if CONFIG_INTERNAL_STATS
2310 fprintf(stats_file, "First mismatch occurred in frame %d\n",
2311 frame_cnt);
2312 fclose(stats_file);
2313#endif
2314 fatal("Mismatch seen");
2315 }
2316 }
2317 }
2318#endif
2319 } // loop over spatial layers
2320 ++frame_cnt;
2321 pts += frame_duration;
2322 }
2323
2324 for (i = 0; i < MAX_NUM_SPATIAL_LAYERS; ++i) {
2325 if (app_input.input_ctx[i].filename == NULL) {
2326 break;
2327 }
2328 close_input_file(&(app_input.input_ctx[i]));
2329 }
2330 printout_rate_control_summary(&rc, frame_cnt, ss_number_layers,
2331 ts_number_layers);
2332
2333 printf("\n");
2334 for (int slx = 0; slx < ss_number_layers; slx++)
2335 for (int tlx = 0; tlx < ts_number_layers; tlx++) {
2336 int lx = slx * ts_number_layers + tlx;
2337 printf("Per layer encoding time/FPS stats for encoder: %d %d %d %f %f \n",
2338 slx, tlx, frame_cnt_layer[lx],
2339 (float)cx_time_layer[lx] / (double)(frame_cnt_layer[lx] * 1000),
2340 1000000 * (double)frame_cnt_layer[lx] / (double)cx_time_layer[lx]);
2341 }
2342
2343 printf("\n");
2344 printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n",
2345 frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
2346 1000000 * (double)frame_cnt / (double)cx_time);
2347
2348 if (app_input.show_psnr) {
2349 show_psnr(&psnr_stream, 255.0);
2350 }
2351
2352 if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy encoder");
2353
2354#if CONFIG_AV1_DECODER
2355 if (app_input.decode) {
2356 if (aom_codec_destroy(&decoder))
2357 die_codec(&decoder, "Failed to destroy decoder");
2358 }
2359#endif
2360
2361#if CONFIG_INTERNAL_STATS
2362 fprintf(stats_file, "No mismatch detected in recon buffers\n");
2363 fclose(stats_file);
2364#endif
2365
2366 // Try to rewrite the output file headers with the actual frame count.
2367 for (i = 0; i < ss_number_layers * ts_number_layers; ++i)
2368 aom_video_writer_close(outfile[i]);
2369 aom_video_writer_close(total_layer_file);
2370
2371 if (has_non_y4m_input) {
2372 aom_img_free(&raw);
2373 }
2374 return EXIT_SUCCESS;
2375}
Describes the decoder algorithm interface to applications.
Describes the encoder algorithm interface to applications.
Describes the aom image descriptor and associated operations.
@ AOM_MIF_KEY_FRAME
Definition aom_image.h:166
@ AOM_CSP_UNKNOWN
Definition aom_image.h:143
enum aom_chroma_sample_position aom_chroma_sample_position_t
List of chroma sample positions.
#define AOM_IMG_FMT_HIGHBITDEPTH
Definition aom_image.h:38
aom_image_t * aom_img_alloc(aom_image_t *img, aom_img_fmt_t fmt, unsigned int d_w, unsigned int d_h, unsigned int align)
Open a descriptor, allocating storage for the underlying image.
@ AOM_IMG_FMT_I420
Definition aom_image.h:45
enum aom_img_fmt aom_img_fmt_t
List of supported image formats.
int aom_img_add_metadata(aom_image_t *img, uint32_t type, const uint8_t *data, size_t sz, aom_metadata_insert_flags_t insert_flag)
Add metadata to image.
void aom_img_free(aom_image_t *img)
Close an image descriptor.
Provides definitions for using AOM or AV1 encoder algorithm within the aom Codec Interface.
#define AOM_MAX_LAYERS
Definition aomcx.h:1713
#define AOM_MAX_TS_LAYERS
Definition aomcx.h:1715
aom_codec_iface_t * aom_codec_av1_cx(void)
The interface to the AV1 encoder.
@ AOM_FULL_SUPERFRAME_DROP
Definition aomcx.h:1775
@ AV1E_SET_BITRATE_ONE_PASS_CBR
Codec control to set the target bitrate in kilobits per second, unsigned int parameter....
Definition aomcx.h:1533
@ AV1E_SET_ENABLE_SMOOTH_INTRA
Codec control function to turn on / off smooth intra modes usage, int parameter.
Definition aomcx.h:1076
@ AV1E_SET_ENABLE_TPL_MODEL
Codec control function to enable RDO modulated by frame temporal dependency, unsigned int parameter.
Definition aomcx.h:414
@ AV1E_SET_AQ_MODE
Codec control function to set adaptive quantization mode, unsigned int parameter.
Definition aomcx.h:474
@ AV1E_SET_SVC_LAYER_ID
Codec control function to set the layer id, aom_svc_layer_id_t* parameter.
Definition aomcx.h:1282
@ AV1E_SET_SVC_REF_FRAME_CONFIG
Codec control function to set the reference frame config, aom_svc_ref_frame_config_t* parameter.
Definition aomcx.h:1292
@ AV1E_SET_TUNE_CONTENT
Codec control function to set content type, aom_tune_content parameter.
Definition aomcx.h:503
@ AV1E_SET_CDF_UPDATE_MODE
Codec control function to set CDF update mode, unsigned int parameter.
Definition aomcx.h:512
@ AV1E_SET_ENABLE_ANGLE_DELTA
Codec control function to turn on/off intra angle delta, int parameter.
Definition aomcx.h:1123
@ AV1E_SET_MV_COST_UPD_FREQ
Control to set frequency of the cost updates for motion vectors, unsigned int parameter.
Definition aomcx.h:1260
@ AV1E_SET_INTRA_DEFAULT_TX_ONLY
Control to use default tx type only for intra modes, int parameter.
Definition aomcx.h:1209
@ AV1E_SET_SVC_REF_FRAME_COMP_PRED
Codec control function to set reference frame compound prediction. aom_svc_ref_frame_comp_pred_t* par...
Definition aomcx.h:1397
@ AV1E_SET_ENABLE_INTRABC
Codec control function to turn on/off intra block copy mode, int parameter.
Definition aomcx.h:1119
@ AV1E_SET_ENABLE_WARPED_MOTION
Codec control function to turn on / off warped motion usage at sequence level, int parameter.
Definition aomcx.h:1044
@ AV1E_SET_RTC_EXTERNAL_RC
Codec control function to set flag for rate control used by external encoders.
Definition aomcx.h:1432
@ AV1E_SET_COEFF_COST_UPD_FREQ
Control to set frequency of the cost updates for coefficients, unsigned int parameter.
Definition aomcx.h:1240
@ AV1E_SET_ENABLE_CDEF
Codec control function to encode with CDEF, unsigned int parameter.
Definition aomcx.h:676
@ AOME_SET_ACTIVEMAP
Codec control function to pass an Active map to encoder, aom_active_map_t* parameter.
Definition aomcx.h:190
@ AV1E_SET_DV_COST_UPD_FREQ
Control to set frequency of the cost updates for intrabc motion vectors, unsigned int parameter.
Definition aomcx.h:1363
@ AV1E_SET_SVC_FRAME_DROP_MODE
Codec control to set the frame drop mode for SVC, unsigned int parameter. The valid values are consta...
Definition aomcx.h:1546
@ AV1E_SET_SVC_PARAMS
Codec control function to set SVC parameters, aom_svc_params_t* parameter.
Definition aomcx.h:1287
@ AV1E_SET_ENABLE_FILTER_INTRA
Codec control function to turn on / off filter intra usage at sequence level, int parameter.
Definition aomcx.h:1065
@ AV1E_SET_ENABLE_PALETTE
Codec control function to turn on/off palette mode, int parameter.
Definition aomcx.h:1115
@ AV1E_SET_ENABLE_CFL_INTRA
Codec control function to turn on / off CFL uv intra mode usage, int parameter.
Definition aomcx.h:1094
@ AOME_SET_MAX_INTRA_BITRATE_PCT
Codec control function to set max data rate for intra frames, unsigned int parameter.
Definition aomcx.h:312
@ AV1E_SET_ERROR_RESILIENT_MODE
Codec control function to enable error_resilient_mode, int parameter.
Definition aomcx.h:448
@ AV1E_SET_ENABLE_OBMC
Codec control function to predict with OBMC mode, unsigned int parameter.
Definition aomcx.h:703
@ AV1E_SET_AUTO_TILES
Codec control to set auto tiling, unsigned int parameter. Value of 1 means encoder will set number of...
Definition aomcx.h:1554
@ AV1E_SET_LOOPFILTER_CONTROL
Codec control to control loop filter.
Definition aomcx.h:1412
@ AOME_SET_SCALEMODE
Codec control function to set encoder scaling mode for the next frame to be coded,...
Definition aomcx.h:197
@ AV1E_SET_ENABLE_ORDER_HINT
Codec control function to turn on / off frame order hint (int parameter). Affects: joint compound mod...
Definition aomcx.h:871
@ AV1E_SET_DELTAQ_MODE
Codec control function to set the delta q mode, unsigned int parameter.
Definition aomcx.h:1137
@ AV1E_SET_POSTENCODE_DROP_RTC
Codec control to enable post encode frame drop for RTC encoding, int parameter.
Definition aomcx.h:1570
@ AV1E_SET_ENABLE_GLOBAL_MOTION
Codec control function to turn on / off global motion usage for a sequence, int parameter.
Definition aomcx.h:1034
@ AOME_SET_CPUUSED
Codec control function to set encoder internal speed settings, int parameter.
Definition aomcx.h:220
@ AV1E_GET_HIGH_MOTION_CONTENT_SCREEN_RTC
Codec control to get the high motion content flag, used for screen content realtime (RTC) encoding,...
Definition aomcx.h:1561
@ AV1E_SET_GF_CBR_BOOST_PCT
Boost percentage for Golden Frame in CBR mode, unsigned int parameter.
Definition aomcx.h:345
@ AV1E_SET_QUANTIZER_ONE_PASS
Codec control to set quantizer for the next frame, int parameter.
Definition aomcx.h:1495
@ AV1E_SET_MODE_COST_UPD_FREQ
Control to set frequency of the cost updates for mode, unsigned int parameter.
Definition aomcx.h:1250
@ AV1E_SET_MAX_CONSEC_FRAME_DROP_MS_CBR
Codec control to set the maximum number of consecutive frame drops, in units of time (milliseconds),...
Definition aomcx.h:1576
@ AV1_GET_NEW_FRAME_IMAGE
Codec control function to get a pointer to the new frame.
Definition aom.h:70
const char * aom_codec_iface_name(aom_codec_iface_t *iface)
Return the name for a given interface.
enum aom_bit_depth aom_bit_depth_t
Bit depth for codecThis enumeration determines the bit depth of the codec.
aom_codec_err_t aom_codec_control(aom_codec_ctx_t *ctx, int ctrl_id,...)
Algorithm Control.
long aom_codec_flags_t
Initialization-time Feature Enabling.
Definition aom_codec.h:232
const struct aom_codec_iface aom_codec_iface_t
Codec interface structure.
Definition aom_codec.h:271
aom_codec_err_t aom_codec_destroy(aom_codec_ctx_t *ctx)
Destroy a codec instance.
const char * aom_codec_err_to_string(aom_codec_err_t err)
Convert error number to printable string.
aom_codec_err_t
Algorithm return codes.
Definition aom_codec.h:155
#define AOM_CODEC_CONTROL_TYPECHECKED(ctx, id, data)
aom_codec_control wrapper macro (adds type-checking, less flexible)
Definition aom_codec.h:542
const void * aom_codec_iter_t
Iterator.
Definition aom_codec.h:305
#define AOM_FRAME_IS_KEY
Definition aom_codec.h:288
@ AOM_BITS_8
Definition aom_codec.h:336
@ AOM_BITS_10
Definition aom_codec.h:337
@ AOM_CODEC_INVALID_PARAM
An application-supplied parameter is not valid.
Definition aom_codec.h:200
@ AOM_CODEC_MEM_ERROR
Memory operation failed.
Definition aom_codec.h:163
@ AOM_CODEC_OK
Operation completed without error.
Definition aom_codec.h:157
aom_codec_err_t aom_codec_decode(aom_codec_ctx_t *ctx, const uint8_t *data, size_t data_sz, void *user_priv)
Decode data.
#define aom_codec_dec_init(ctx, iface, cfg, flags)
Convenience macro for aom_codec_dec_init_ver()
Definition aom_decoder.h:129
const aom_codec_cx_pkt_t * aom_codec_get_cx_data(aom_codec_ctx_t *ctx, aom_codec_iter_t *iter)
Encoded data iterator.
aom_codec_err_t aom_codec_encode(aom_codec_ctx_t *ctx, const aom_image_t *img, aom_codec_pts_t pts, unsigned long duration, aom_enc_frame_flags_t flags)
Encode a frame.
#define aom_codec_enc_init(ctx, iface, cfg, flags)
Convenience macro for aom_codec_enc_init_ver()
Definition aom_encoder.h:943
aom_codec_err_t aom_codec_enc_config_default(aom_codec_iface_t *iface, aom_codec_enc_cfg_t *cfg, unsigned int usage)
Get the default configuration for a usage.
#define AOM_USAGE_REALTIME
usage parameter analogous to AV1 REALTIME mode.
Definition aom_encoder.h:1016
#define AOM_CODEC_USE_HIGHBITDEPTH
Definition aom_encoder.h:80
#define AOM_CODEC_USE_PSNR
Initialization-time Feature Enabling.
Definition aom_encoder.h:79
@ AOM_CBR
Definition aom_encoder.h:187
@ AOM_KF_AUTO
Definition aom_encoder.h:202
@ AOM_CODEC_PSNR_PKT
Definition aom_encoder.h:113
@ AOM_CODEC_CX_FRAME_PKT
Definition aom_encoder.h:110
aom active region map
Definition aomcx.h:1632
unsigned int rows
Definition aomcx.h:1635
unsigned int cols
Definition aomcx.h:1636
unsigned char * active_map
specify an on (1) or off (0) each 16x16 region within a frame
Definition aomcx.h:1634
Codec context structure.
Definition aom_codec.h:315
Encoder output packet.
Definition aom_encoder.h:122
size_t sz
Definition aom_encoder.h:127
enum aom_codec_cx_pkt_kind kind
Definition aom_encoder.h:123
double psnr[4]
Definition aom_encoder.h:145
union aom_codec_cx_pkt::@1 data
struct aom_codec_cx_pkt::@1::@2 frame
aom_codec_frame_flags_t flags
Definition aom_encoder.h:132
void * buf
Definition aom_encoder.h:126
Encoder configuration structure.
Definition aom_encoder.h:387
unsigned int g_input_bit_depth
Bit-depth of the input frames.
Definition aom_encoder.h:475
unsigned int rc_dropframe_thresh
Temporal resampling configuration, if supported by the codec.
Definition aom_encoder.h:540
struct aom_rational g_timebase
Stream timebase units.
Definition aom_encoder.h:489
unsigned int g_usage
Algorithm specific "usage" value.
Definition aom_encoder.h:399
unsigned int rc_buf_sz
Decoder Buffer Size.
Definition aom_encoder.h:705
unsigned int g_h
Height of the frame.
Definition aom_encoder.h:435
enum aom_kf_mode kf_mode
Keyframe placement mode.
Definition aom_encoder.h:768
enum aom_rc_mode rc_end_usage
Rate control algorithm to use.
Definition aom_encoder.h:623
unsigned int g_threads
Maximum number of threads to use.
Definition aom_encoder.h:407
unsigned int kf_min_dist
Keyframe minimum interval.
Definition aom_encoder.h:777
unsigned int g_lag_in_frames
Allow lagged encoding.
Definition aom_encoder.h:518
unsigned int rc_buf_initial_sz
Decoder Buffer Initial Size.
Definition aom_encoder.h:714
unsigned int g_profile
Bitstream profile to use.
Definition aom_encoder.h:417
aom_bit_depth_t g_bit_depth
Bit-depth of the codec.
Definition aom_encoder.h:467
unsigned int g_w
Width of the frame.
Definition aom_encoder.h:426
unsigned int rc_undershoot_pct
Rate control adaptation undershoot control.
Definition aom_encoder.h:681
unsigned int kf_max_dist
Keyframe maximum interval.
Definition aom_encoder.h:786
aom_codec_er_flags_t g_error_resilient
Enable error resilient modes.
Definition aom_encoder.h:497
unsigned int rc_max_quantizer
Maximum (Worst Quality) Quantizer.
Definition aom_encoder.h:668
unsigned int rc_buf_optimal_sz
Decoder Buffer Optimal Size.
Definition aom_encoder.h:723
unsigned int rc_min_quantizer
Minimum (Best Quality) Quantizer.
Definition aom_encoder.h:658
unsigned int rc_target_bitrate
Target data rate.
Definition aom_encoder.h:644
unsigned int rc_resize_mode
Mode for spatial resampling, if supported by the codec.
Definition aom_encoder.h:549
unsigned int rc_overshoot_pct
Rate control adaptation overshoot control.
Definition aom_encoder.h:690
Image Descriptor.
Definition aom_image.h:182
aom_img_fmt_t fmt
Definition aom_image.h:183
unsigned int d_w
Definition aom_image.h:197
unsigned int d_h
Definition aom_image.h:198
int num
Definition aom_encoder.h:165
int den
Definition aom_encoder.h:166
aom image scaling mode
Definition aomcx.h:1644
Definition aomcx.h:1718
int temporal_layer_id
Definition aomcx.h:1720
int spatial_layer_id
Definition aomcx.h:1719
Definition aomcx.h:1729
int max_quantizers[32]
Definition aomcx.h:1732
int number_spatial_layers
Definition aomcx.h:1730
int layer_target_bitrate[32]
Definition aomcx.h:1737
int framerate_factor[8]
Definition aomcx.h:1739
int min_quantizers[32]
Definition aomcx.h:1733
int scaling_factor_den[4]
Definition aomcx.h:1735
int number_temporal_layers
Definition aomcx.h:1731
int scaling_factor_num[4]
Definition aomcx.h:1734
Definition aomcx.h:1766
int use_comp_pred[3]
Definition aomcx.h:1769
Definition aomcx.h:1743
int reference[7]
Definition aomcx.h:1759
int refresh[8]
Definition aomcx.h:1762
int ref_idx[7]
Definition aomcx.h:1761